From 9e483d5285d9f990d97b7daffc8743316b10e371 Mon Sep 17 00:00:00 2001 From: Jeltz Date: Wed, 1 Nov 2023 18:56:44 +0100 Subject: [PATCH] prometheus: add quanta alerts --- group_vars/prom/prometheus.yml | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/group_vars/prom/prometheus.yml b/group_vars/prom/prometheus.yml index 0d6bf5c..7870f07 100644 --- a/group_vars/prom/prometheus.yml +++ b/group_vars/prom/prometheus.yml @@ -40,14 +40,16 @@ prometheus__alert_rules_prometheus: labels: severity: critical -prometheus__alert_rules_node: - - alert: MachineDown - expr: "up == 0" +prometheus__alert_rules_common: + - alert: CollectorDown + expr: 'up == 0' for: 3m labels: severity: critical annotations: summary: "Collecteur {{ '$labels.job' | interp }}" + +prometheus__alert_rules_node: - alert: OutOfMemory expr: "( node_memory_MemFree_bytes + node_memory_Cached_bytes @@ -282,16 +284,16 @@ prometheus__alert_rules_quanta: labels: severity: warning annotations: - summary: "La température de {{ '$labels.snAgentTempSensorDescr' }} est - élevée ({{ '$value' | interp }}°C)" + summary: "La température de {{ '$labels.snAgentTempSensorDescr' | interp }} + est élevée ({{ '$value' | interp }}°C)" - alert: QuantaTemp expr: '(snAgentTempValue / 2) > 60' for: 0m labels: severity: critical annotations: - summary: "La température de {{ '$labels.snAgentTempSensorDescr' }} est - très élevée ({{ '$value' | interp }}°C)" + summary: "La température de {{ '$labels.snAgentTempSensorDescr' | interp }} + est très élevée ({{ '$value' | interp }}°C)" - alert: QuantaPowerRedundancyFailure expr: 'count by (instance) (snChasPwrSupplyOperStatus{snChasPwrSupplyOperStatus="normal"}) < 2' for: 0m @@ -299,6 +301,7 @@ prometheus__alert_rules_quanta: severity: warning prometheus__alert_rules: + common: "{{ prometheus__alert_rules_common }}" prometheus: "{{ prometheus__alert_rules_prometheus }}" node: "{{ prometheus__alert_rules_node }}" keepalived: "{{ prometheus__alert_rules_keepalived }}"