Cleanup Prometheus alerts #45

Merged
jeltz merged 11 commits from prometheus_alerts into master 2021-04-01 19:24:26 +02:00
2 changed files with 26 additions and 20 deletions
Showing only changes of commit eeaf0f8486 - Show all commits

View file

@ -2,10 +2,10 @@
{{ ansible_managed | comment }}
{% macro raw(string) -%}
{{ string }}
{{ "{{" }} {{ string }} {{ "}}" }}
{%- endmacro %}
{% set instance = '[{{ raw("$label.instance") }}]' %}
{% set instance = "[{{ $labels.instance }}]" %}
groups:
- name: alert.rules
@ -15,10 +15,10 @@ groups:
expr: up == 0
for: 3m
labels:
severity: critical
severity: critical
annotations:
summary: >-
{{ instance }} Invisible depuis plus de 3 minutes
summary: >-
{{ instance }} Invisible depuis plus de 3 minutes
- alert: OutOfMemory
expr: >-
@ -32,7 +32,8 @@ groups:
severity: warning
annotations:
summary: >-
{{ instance }} {{ raw("$value | round") }}% de mémoire libre
{{ instance }} {{ raw('$value | printf "%.1f"') }}% de mémoire
libre
- alert: OutOfDiskSpace
expr: >-
@ -42,8 +43,8 @@ groups:
severity: warning
annotations:
summary: >-
{{ instance }} Espace libre de {{ raw("$labels.mountpoint") }} sur
à {{ raw("$value | round") }}%
{{ instance }} Espace libre de {{ raw('$labels.mountpoint') }} sur
à {{ raw('$value | printf "%.1f"') }}%
- alert: OutOfInodes
expr: node_filesystem_files_free / node_filesystem_files * 100 < 10
@ -52,8 +53,8 @@ groups:
severity: warning
annotations:
summary: >-
{{ instance }} {{ raw("$value | round") }}% d'inodes restants
pour {{ raw("$labels.mountpoint") }}
{{ instance }} {{ raw('$value | printf "%.1f"') }}% d'inodes
restants pour {{ raw('$labels.mountpoint') }}
- alert: CpuUsage
expr: >-
@ -67,7 +68,7 @@ groups:
severity: warning
annotations:
summary: >-
{{ instance }} CPU à {{ raw("$value | round") }}%
{{ instance }} CPU à {{ raw('$value | printf "%.1f"') }}%
- alert: SystemdServiceFailed
expr: node_systemd_unit_state{state="failed"} == 1
@ -76,7 +77,7 @@ groups:
severity: warning
annotations:
summary: >-
{{ instance }} {{ raw("$label.name") }} a échoué
{{ instance }} {{ raw('$labels.name') }} a échoué
- alert: LoadUsage
expr: node_load1 > 5
@ -85,7 +86,7 @@ groups:
severity: warning
annotations:
summary: >
{{ instance }} Charge à {{ raw("$value") }}
{{ instance }} Charge à {{ raw('$value') }}
- alert: UpsOutputSourceChanged
expr: upsOutputSource != 3
@ -121,7 +122,7 @@ groups:
severity: critical
annotations:
summary: >-
{{ instance }} Charge de {{ raw("$value | round") }}%
{{ instance }} Charge de {{ raw('$value | printf "%.1f"') }}%
- alert: UpsWrongInputVoltage
expr: (upsInputVoltage < 210) or (upsInputVoltage > 250)
@ -130,7 +131,7 @@ groups:
severity: warning
annotations:
summary: >-
{{ instance }} Tension d'entrée de {{ raw("$value") }}V
{{ instance }} Tension d'entrée de {{ raw('$value') }}V
- alert: UpsWrongOutputVoltage
expr: (upsOutputVoltage < 220) or (upsOutputVoltage > 240)
@ -139,7 +140,7 @@ groups:
severity: warning
annotations:
summary: >-
{{ instance }} Tension de sortie de {{ raw("$value") }}V
{{ instance }} Tension de sortie de {{ raw('$value') }}V
- alert: UpsTimeRemainingWarning
expr: upsEstimatedMinutesRemaining < 8
@ -148,7 +149,7 @@ groups:
severity: warning
annotations:
summary: >-
{{ instance }} Autonomie restante de {{ raw("$value") }} min
{{ instance }} Autonomie restante de {{ raw('$value') }} min
- alert: UpsTimeRemainingCritical
expr: upsEstimatedMinutesRemaining < 5
@ -157,5 +158,5 @@ groups:
severity: critical
annotations:
summary: >-
{{ instance }} Autonomie restante de {{ raw("$value") }} min
{{ instance }} Autonomie restante de {{ raw('$value') }} min
...

View file

@ -1,6 +1,12 @@
---
{{ ansible_managed | comment }}
{% macro raw(string) -%}
{{ "{{" }} {{ string }} {{ "}}" }}
{%- endmacro %}
{% set instance = "[{{ $labels.instance }}]" %}
groups:
- name: alert.rules
rules:
@ -11,6 +17,5 @@ groups:
severity: critical
annotations:
summary: >-
Federate : {{ "{{" }} $labels.instance {{ "}}" }} est invisible
depuis plus de 3 minutes !
{{ instance }} Invisible depuis plus de 3 minutes
...