prometheus_snmp: Add alerts on Quanta system

This commit is contained in:
v-lafeychine 2023-11-01 18:34:17 +01:00
parent 190f31dffd
commit 51674bc1f6
Signed by: v-lafeychine
GPG key ID: F46CAAD27C7AB0D5

View file

@ -236,10 +236,67 @@ prometheus__alert_rules_bird:
prometheus__alert_rules_quanta: prometheus__alert_rules_quanta:
- alert: QuantaQueueOverflow - alert: QuantaQueueOverflow
expr: "snAgGblQueueOverflow == 1" expr: 'snAgGblQueueOverflow == 1'
for: 0m for: 0m
labels: labels:
severity: critical severity: critical
- alert: QuantaCpuUsage
expr: 'snAgGblCpuUtil1MinAvg > 50'
for: 5m
labels:
severity: warning
annotations:
summary: "Utilisation forte du processus ({{ '$value' | interp }}%)"
- alert: QuantaCpuUsage
expr: 'snAgGblCpuUtil1MinAvg > 80'
for: 5m
labels:
severity: critical
annotations:
summary: "Utilisation intense du processus ({{ '$value' | interp }}%)"
- alert: QuantaMemoryUsage
expr: '100 * (1 - (snAgGblDynMemFree / snAgGblDynMemTotal)) > 50'
for: 5m
labels:
severity: warning
annotations:
summary: "Utilisation forte de la mémoire ({{ '$value' | interp }}%)"
- alert: QuantaMemoryUsage
expr: '100 * (1 - (snAgGblDynMemFree / snAgGblDynMemTotal)) > 80'
for: 5m
labels:
severity: alert
annotations:
summary: "Utilisation intense de la mémoire ({{ '$value' | interp }}%)"
- alert: QuantaFanHealth
expr: 'snChasFanOperStatus{snChasFanOperStatus="normal"} == 0'
for: 0m
labels:
severity: critical
annotations:
summary: "Le ventilateur {{ '$labels.snChasFanDescription' | interp }} est
en mode {{ '$labels.snChasFanOperStatus' | interp }}"
- alert: QuantaTemp
expr: '(snAgentTempValue / 2) > 45'
for: 0m
labels:
severity: warning
annotations:
summary: "La température de {{ '$labels.snAgentTempSensorDescr' }} est
élevée ({{ '$value' | interp }}°C)"
- alert: QuantaTemp
expr: '(snAgentTempValue / 2) > 60'
for: 0m
labels:
severity: critical
annotations:
summary: "La température de {{ '$labels.snAgentTempSensorDescr' }} est
très élevée ({{ '$value' | interp }}°C)"
- alert: QuantaPowerRedundancyFailure
expr: 'count by (instance) (snChasPwrSupplyOperStatus{snChasPwrSupplyOperStatus="normal"}) < 2'
for: 0m
labels:
severity: warning
prometheus__alert_rules: prometheus__alert_rules:
prometheus: "{{ prometheus__alert_rules_prometheus }}" prometheus: "{{ prometheus__alert_rules_prometheus }}"