162 lines
4.1 KiB
Django/Jinja
162 lines
4.1 KiB
Django/Jinja
---
|
|
{{ ansible_managed | comment }}
|
|
|
|
{% macro raw(string) -%}
|
|
{{ "{{" }} {{ string }} {{ "}}" }}
|
|
{%- endmacro %}
|
|
|
|
groups:
|
|
- name: alert.rules
|
|
rules:
|
|
|
|
- alert: InstanceDown
|
|
expr: up == 0
|
|
for: 3m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: >-
|
|
Invisible depuis plus de 3 minutes
|
|
|
|
- alert: OutOfMemory
|
|
expr: >-
|
|
(
|
|
node_memory_MemFree_bytes
|
|
+ node_memory_Cached_bytes
|
|
+ node_memory_Buffers_bytes
|
|
) / node_memory_MemTotal_bytes * 100 < 10
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: >-
|
|
{{ raw('$value | printf "%.1f"') }}% de mémoire
|
|
libre
|
|
|
|
- alert: OutOfDiskSpace
|
|
expr: >-
|
|
node_filesystem_free_bytes / node_filesystem_size_bytes * 100 < 10
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: >-
|
|
{{ raw('$value | printf "%.1f"') }}% d'espace libre pour
|
|
{{ raw('$labels.mountpoint') }}
|
|
|
|
- alert: OutOfInodes
|
|
expr: node_filesystem_files_free / node_filesystem_files * 100 < 10
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: >-
|
|
{{ raw('$value | printf "%.1f"') }}% d'inodes
|
|
restants pour {{ raw('$labels.mountpoint') }}
|
|
|
|
- alert: CpuUsage
|
|
expr: >-
|
|
(
|
|
100 - avg by (instance) (
|
|
irate(node_cpu_seconds_total{mode="idle"}[5m])
|
|
) * 100
|
|
) > 75
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: >-
|
|
CPU à {{ raw('$value | printf "%.1f"') }}%
|
|
|
|
- alert: SystemdServiceFailed
|
|
expr: node_systemd_unit_state{state="failed"} == 1
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: >-
|
|
{{ raw('$labels.name') }} a échoué
|
|
|
|
- alert: LoadUsage
|
|
expr: node_load1 > 5
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: >
|
|
Charge à {{ raw('$value') }}
|
|
|
|
- alert: UpsOutputSourceChanged
|
|
expr: upsOutputSource != 3
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: >-
|
|
Source d'alimentation changée
|
|
|
|
- alert: UpsBatteryStatus
|
|
expr: upsBatteryStatus == 3
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: >-
|
|
État de la batterie faible
|
|
|
|
- alert: UpsBatteryStatus
|
|
expr: upsBatteryStatus == 4
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: >-
|
|
État de la batterie critique
|
|
|
|
- alert: UpsHighLoad
|
|
expr: upsOutputPercentLoad > 70
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: >-
|
|
Charge de {{ raw('$value | printf "%.1f"') }}%
|
|
|
|
- alert: UpsWrongInputVoltage
|
|
expr: (upsInputVoltage < 210) or (upsInputVoltage > 250)
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: >-
|
|
Tension d'entrée de {{ raw('$value') }}V
|
|
|
|
- alert: UpsWrongOutputVoltage
|
|
expr: >-
|
|
abs(upsInputVoltage - avg_over_time(upsOutputVoltage[1d]))
|
|
< 3 * stddev_over_time(upsOutputVoltage[1d])
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: >-
|
|
Tension de sortie de {{ raw('$value') }}V
|
|
|
|
- alert: UpsTimeRemaining
|
|
expr: upsEstimatedMinutesRemaining < 8
|
|
for: 1m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: >-
|
|
Autonomie restante de {{ raw('$value') }} min
|
|
|
|
- alert: UpsTimeRemaining
|
|
expr: upsEstimatedMinutesRemaining < 5
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: >-
|
|
Autonomie restante de {{ raw('$value') }} min
|
|
...
|