Separate AP/servers down alerts and add temperature alerts #46
1 changed files with 7 additions and 3 deletions
|
@ -59,12 +59,14 @@ groups:
|
||||||
La mémoire swap est utilisée à {{ raw('$value | printf "%.1f"') }}%
|
La mémoire swap est utilisée à {{ raw('$value | printf "%.1f"') }}%
|
||||||
|
|
||||||
- alert: HostPhysicalComponentTooHot
|
- alert: HostPhysicalComponentTooHot
|
||||||
expr: node_hwmon_temp_celsius > 75
|
expr: node_hwmon_temp_celsius > 79
|
||||||
for: 3m
|
for: 3m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
summary: La température de l'hôte est de {{ raw('$value') }}°C
|
summary: >-
|
||||||
|
La température de l'hôte est de {{ raw('$value') }}°C
|
||||||
|
({{ raw('$labels.chip') }}, {{ raw('$labels.sensor') }})
|
||||||
|
|
||||||
- alert: HostNodeOvertemperatureAlarm
|
- alert: HostNodeOvertemperatureAlarm
|
||||||
expr: node_hwmon_temp_crit_alarm_celsius == 1
|
expr: node_hwmon_temp_crit_alarm_celsius == 1
|
||||||
|
@ -72,7 +74,9 @@ groups:
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
summary: L'alarme de température de l'hôte est active
|
summary: >-
|
||||||
|
L'alarme de température de l'hôte est active
|
||||||
|
({{ raw('$labels.chip') }}, {{ raw('$labels.sensor') }})
|
||||||
|
|
||||||
- alert: HostOomKillDetected
|
- alert: HostOomKillDetected
|
||||||
expr: increase(node_vmstat_oom_kill[1m]) > 0
|
expr: increase(node_vmstat_oom_kill[1m]) > 0
|
||||||
|
|
Loading…
Reference in a new issue