Cleanup Prometheus alerts #45

Merged
jeltz merged 11 commits from prometheus_alerts into master 2021-04-01 19:24:26 +02:00
2 changed files with 20 additions and 24 deletions
Showing only changes of commit 5bcc428895 - Show all commits

View file

@ -5,8 +5,6 @@
{{ "{{" }} {{ string }} {{ "}}" }} {{ "{{" }} {{ string }} {{ "}}" }}
{%- endmacro %} {%- endmacro %}
{% set instance = "[{{ $labels.instance }}]" %}
groups: groups:
- name: alert.rules - name: alert.rules
rules: rules:
@ -18,7 +16,7 @@ groups:
severity: critical severity: critical
annotations: annotations:
summary: >- summary: >-
{{ instance }} Invisible depuis plus de 3 minutes Invisible depuis plus de 3 minutes
- alert: OutOfMemory - alert: OutOfMemory
expr: >- expr: >-
@ -32,7 +30,7 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} {{ raw('$value | printf "%.1f"') }}% de mémoire {{ raw('$value | printf "%.1f"') }}% de mémoire
libre libre
- alert: OutOfDiskSpace - alert: OutOfDiskSpace
@ -43,7 +41,7 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} Espace libre de {{ raw('$labels.mountpoint') }} sur Espace libre de {{ raw('$labels.mountpoint') }} sur
à {{ raw('$value | printf "%.1f"') }}% à {{ raw('$value | printf "%.1f"') }}%
- alert: OutOfInodes - alert: OutOfInodes
@ -53,7 +51,7 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} {{ raw('$value | printf "%.1f"') }}% d'inodes {{ raw('$value | printf "%.1f"') }}% d'inodes
restants pour {{ raw('$labels.mountpoint') }} restants pour {{ raw('$labels.mountpoint') }}
- alert: CpuUsage - alert: CpuUsage
@ -68,7 +66,7 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} CPU à {{ raw('$value | printf "%.1f"') }}% CPU à {{ raw('$value | printf "%.1f"') }}%
- alert: SystemdServiceFailed - alert: SystemdServiceFailed
expr: node_systemd_unit_state{state="failed"} == 1 expr: node_systemd_unit_state{state="failed"} == 1
@ -77,7 +75,7 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} {{ raw('$labels.name') }} a échoué {{ raw('$labels.name') }} a échoué
- alert: LoadUsage - alert: LoadUsage
expr: node_load1 > 5 expr: node_load1 > 5
@ -86,7 +84,7 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: > summary: >
{{ instance }} Charge à {{ raw('$value') }} Charge à {{ raw('$value') }}
- alert: UpsOutputSourceChanged - alert: UpsOutputSourceChanged
expr: upsOutputSource != 3 expr: upsOutputSource != 3
@ -95,25 +93,25 @@ groups:
severity: critical severity: critical
annotations: annotations:
summary: >- summary: >-
{{ instance }} Source d'alimentation changée Source d'alimentation changée
- alert: UpsBatteryStatusWarning - alert: UpsBatteryStatus
expr: upsBatteryStatus == 3 expr: upsBatteryStatus == 3
for: 2m for: 2m
labels: labels:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} État de la batterie faible État de la batterie faible
- alert: UpsBatteryStatusCritical - alert: UpsBatteryStatus
expr: upsBatteryStatus == 4 expr: upsBatteryStatus == 4
for: 10m for: 10m
labels: labels:
severity: critical severity: critical
annotations: annotations:
summary: >- summary: >-
{{ instance }} État de la batterie critique État de la batterie critique
- alert: UpsHighLoad - alert: UpsHighLoad
expr: upsOutputPercentLoad > 70 expr: upsOutputPercentLoad > 70
@ -122,7 +120,7 @@ groups:
severity: critical severity: critical
annotations: annotations:
summary: >- summary: >-
{{ instance }} Charge de {{ raw('$value | printf "%.1f"') }}% Charge de {{ raw('$value | printf "%.1f"') }}%
- alert: UpsWrongInputVoltage - alert: UpsWrongInputVoltage
expr: (upsInputVoltage < 210) or (upsInputVoltage > 250) expr: (upsInputVoltage < 210) or (upsInputVoltage > 250)
@ -131,7 +129,7 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} Tension d'entrée de {{ raw('$value') }}V Tension d'entrée de {{ raw('$value') }}V
- alert: UpsWrongOutputVoltage - alert: UpsWrongOutputVoltage
expr: (upsOutputVoltage < 220) or (upsOutputVoltage > 240) expr: (upsOutputVoltage < 220) or (upsOutputVoltage > 240)
@ -140,23 +138,23 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} Tension de sortie de {{ raw('$value') }}V Tension de sortie de {{ raw('$value') }}V
- alert: UpsTimeRemainingWarning - alert: UpsTimeRemaining
expr: upsEstimatedMinutesRemaining < 8 expr: upsEstimatedMinutesRemaining < 8
for: 1m for: 1m
labels: labels:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} Autonomie restante de {{ raw('$value') }} min Autonomie restante de {{ raw('$value') }} min
- alert: UpsTimeRemainingCritical - alert: UpsTimeRemaining
expr: upsEstimatedMinutesRemaining < 5 expr: upsEstimatedMinutesRemaining < 5
for: 1m for: 1m
labels: labels:
severity: critical severity: critical
annotations: annotations:
summary: >- summary: >-
{{ instance }} Autonomie restante de {{ raw('$value') }} min Autonomie restante de {{ raw('$value') }} min
... ...

View file

@ -5,8 +5,6 @@
{{ "{{" }} {{ string }} {{ "}}" }} {{ "{{" }} {{ string }} {{ "}}" }}
{%- endmacro %} {%- endmacro %}
{% set instance = "[{{ $labels.instance }}]" %}
groups: groups:
- name: alert.rules - name: alert.rules
rules: rules:
@ -17,5 +15,5 @@ groups:
severity: critical severity: critical
annotations: annotations:
summary: >- summary: >-
{{ instance }} Invisible depuis plus de 3 minutes Invisible depuis plus de 3 minutes
... ...