prometheus: add quanta alerts

This commit is contained in:
jeltz 2023-11-01 18:56:44 +01:00
parent 4c33b77695
commit 9e483d5285
Signed by: jeltz
GPG key ID: 800882B66C0C3326

View file

@ -40,14 +40,16 @@ prometheus__alert_rules_prometheus:
labels:
severity: critical
prometheus__alert_rules_node:
- alert: MachineDown
expr: "up == 0"
prometheus__alert_rules_common:
- alert: CollectorDown
expr: 'up == 0'
for: 3m
labels:
severity: critical
annotations:
summary: "Collecteur {{ '$labels.job' | interp }}"
prometheus__alert_rules_node:
- alert: OutOfMemory
expr: "( node_memory_MemFree_bytes
+ node_memory_Cached_bytes
@ -282,16 +284,16 @@ prometheus__alert_rules_quanta:
labels:
severity: warning
annotations:
summary: "La température de {{ '$labels.snAgentTempSensorDescr' }} est
élevée ({{ '$value' | interp }}°C)"
summary: "La température de {{ '$labels.snAgentTempSensorDescr' | interp }}
est élevée ({{ '$value' | interp }}°C)"
- alert: QuantaTemp
expr: '(snAgentTempValue / 2) > 60'
for: 0m
labels:
severity: critical
annotations:
summary: "La température de {{ '$labels.snAgentTempSensorDescr' }} est
très élevée ({{ '$value' | interp }}°C)"
summary: "La température de {{ '$labels.snAgentTempSensorDescr' | interp }}
est très élevée ({{ '$value' | interp }}°C)"
- alert: QuantaPowerRedundancyFailure
expr: 'count by (instance) (snChasPwrSupplyOperStatus{snChasPwrSupplyOperStatus="normal"}) < 2'
for: 0m
@ -299,6 +301,7 @@ prometheus__alert_rules_quanta:
severity: warning
prometheus__alert_rules:
common: "{{ prometheus__alert_rules_common }}"
prometheus: "{{ prometheus__alert_rules_prometheus }}"
node: "{{ prometheus__alert_rules_node }}"
keepalived: "{{ prometheus__alert_rules_keepalived }}"