prometheus: add quanta alerts

This commit is contained in:
jeltz 2023-11-01 18:56:44 +01:00
parent 4c33b77695
commit 9e483d5285
Signed by: jeltz
GPG key ID: 800882B66C0C3326

View file

@ -40,14 +40,16 @@ prometheus__alert_rules_prometheus:
labels: labels:
severity: critical severity: critical
prometheus__alert_rules_node: prometheus__alert_rules_common:
- alert: MachineDown - alert: CollectorDown
expr: "up == 0" expr: 'up == 0'
for: 3m for: 3m
labels: labels:
severity: critical severity: critical
annotations: annotations:
summary: "Collecteur {{ '$labels.job' | interp }}" summary: "Collecteur {{ '$labels.job' | interp }}"
prometheus__alert_rules_node:
- alert: OutOfMemory - alert: OutOfMemory
expr: "( node_memory_MemFree_bytes expr: "( node_memory_MemFree_bytes
+ node_memory_Cached_bytes + node_memory_Cached_bytes
@ -282,16 +284,16 @@ prometheus__alert_rules_quanta:
labels: labels:
severity: warning severity: warning
annotations: annotations:
summary: "La température de {{ '$labels.snAgentTempSensorDescr' }} est summary: "La température de {{ '$labels.snAgentTempSensorDescr' | interp }}
élevée ({{ '$value' | interp }}°C)" est élevée ({{ '$value' | interp }}°C)"
- alert: QuantaTemp - alert: QuantaTemp
expr: '(snAgentTempValue / 2) > 60' expr: '(snAgentTempValue / 2) > 60'
for: 0m for: 0m
labels: labels:
severity: critical severity: critical
annotations: annotations:
summary: "La température de {{ '$labels.snAgentTempSensorDescr' }} est summary: "La température de {{ '$labels.snAgentTempSensorDescr' | interp }}
très élevée ({{ '$value' | interp }}°C)" est très élevée ({{ '$value' | interp }}°C)"
- alert: QuantaPowerRedundancyFailure - alert: QuantaPowerRedundancyFailure
expr: 'count by (instance) (snChasPwrSupplyOperStatus{snChasPwrSupplyOperStatus="normal"}) < 2' expr: 'count by (instance) (snChasPwrSupplyOperStatus{snChasPwrSupplyOperStatus="normal"}) < 2'
for: 0m for: 0m
@ -299,6 +301,7 @@ prometheus__alert_rules_quanta:
severity: warning severity: warning
prometheus__alert_rules: prometheus__alert_rules:
common: "{{ prometheus__alert_rules_common }}"
prometheus: "{{ prometheus__alert_rules_prometheus }}" prometheus: "{{ prometheus__alert_rules_prometheus }}"
node: "{{ prometheus__alert_rules_node }}" node: "{{ prometheus__alert_rules_node }}"
keepalived: "{{ prometheus__alert_rules_keepalived }}" keepalived: "{{ prometheus__alert_rules_keepalived }}"