From f50778ca96e622c349533a455fc596b6eb13cfe8 Mon Sep 17 00:00:00 2001 From: Alexandre Iooss Date: Fri, 31 Dec 2021 14:44:50 +0100 Subject: [PATCH] prometheus: commit production alert configuration --- roles/prometheus/templates/postgres.rules.yml.j2 | 2 +- roles/prometheus/templates/server.rules.yml.j2 | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/roles/prometheus/templates/postgres.rules.yml.j2 b/roles/prometheus/templates/postgres.rules.yml.j2 index 0ec4952..aa24537 100644 --- a/roles/prometheus/templates/postgres.rules.yml.j2 +++ b/roles/prometheus/templates/postgres.rules.yml.j2 @@ -112,7 +112,7 @@ groups: rate(pg_stat_database_xact_rollback{datname!~"template.*"}[3m]) / rate(pg_stat_database_xact_commit{datname!~"template.*"}[3m]) ) * 100 - > 7 + > 20 for: 0m labels: severity: warning diff --git a/roles/prometheus/templates/server.rules.yml.j2 b/roles/prometheus/templates/server.rules.yml.j2 index 5277fdf..457e5e8 100644 --- a/roles/prometheus/templates/server.rules.yml.j2 +++ b/roles/prometheus/templates/server.rules.yml.j2 @@ -10,23 +10,17 @@ groups: - name: server.rules rules: - - alert: InstanceDown + - alert: MachineDown expr: up{instance!~".*.borne.auro.re$"} == 0 for: 3m labels: severity: critical - annotations: - summary: >- - Invisible depuis plus de 3 minutes - alert: AccessPointDown expr: up{instance=~".*.borne.auro.re$"} == 0 for: 3m labels: severity: warning - annotations: - summary: >- - Invisible depuis plus de 3 minutes - alert: OutOfMemory expr: >- @@ -50,7 +44,7 @@ groups: node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes ) - ) * 100 >= 20 + ) * 100 >= 50 for: 3m labels: severity: warning