Cleanup Prometheus alerts #45

Merged
jeltz merged 11 commits from prometheus_alerts into master 2021-04-01 19:24:26 +02:00
2 changed files with 26 additions and 20 deletions
Showing only changes of commit eeaf0f8486 - Show all commits

View file

@ -2,10 +2,10 @@
{{ ansible_managed | comment }} {{ ansible_managed | comment }}
{% macro raw(string) -%} {% macro raw(string) -%}
{{ string }} {{ "{{" }} {{ string }} {{ "}}" }}
{%- endmacro %} {%- endmacro %}
{% set instance = '[{{ raw("$label.instance") }}]' %} {% set instance = "[{{ $labels.instance }}]" %}
groups: groups:
- name: alert.rules - name: alert.rules
@ -32,7 +32,8 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} {{ raw("$value | round") }}% de mémoire libre {{ instance }} {{ raw('$value | printf "%.1f"') }}% de mémoire
libre
- alert: OutOfDiskSpace - alert: OutOfDiskSpace
expr: >- expr: >-
@ -42,8 +43,8 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} Espace libre de {{ raw("$labels.mountpoint") }} sur {{ instance }} Espace libre de {{ raw('$labels.mountpoint') }} sur
à {{ raw("$value | round") }}% à {{ raw('$value | printf "%.1f"') }}%
- alert: OutOfInodes - alert: OutOfInodes
expr: node_filesystem_files_free / node_filesystem_files * 100 < 10 expr: node_filesystem_files_free / node_filesystem_files * 100 < 10
@ -52,8 +53,8 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} {{ raw("$value | round") }}% d'inodes restants {{ instance }} {{ raw('$value | printf "%.1f"') }}% d'inodes
pour {{ raw("$labels.mountpoint") }} restants pour {{ raw('$labels.mountpoint') }}
- alert: CpuUsage - alert: CpuUsage
expr: >- expr: >-
@ -67,7 +68,7 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} CPU à {{ raw("$value | round") }}% {{ instance }} CPU à {{ raw('$value | printf "%.1f"') }}%
- alert: SystemdServiceFailed - alert: SystemdServiceFailed
expr: node_systemd_unit_state{state="failed"} == 1 expr: node_systemd_unit_state{state="failed"} == 1
@ -76,7 +77,7 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} {{ raw("$label.name") }} a échoué {{ instance }} {{ raw('$labels.name') }} a échoué
- alert: LoadUsage - alert: LoadUsage
expr: node_load1 > 5 expr: node_load1 > 5
@ -85,7 +86,7 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: > summary: >
{{ instance }} Charge à {{ raw("$value") }} {{ instance }} Charge à {{ raw('$value') }}
- alert: UpsOutputSourceChanged - alert: UpsOutputSourceChanged
expr: upsOutputSource != 3 expr: upsOutputSource != 3
@ -121,7 +122,7 @@ groups:
severity: critical severity: critical
annotations: annotations:
summary: >- summary: >-
{{ instance }} Charge de {{ raw("$value | round") }}% {{ instance }} Charge de {{ raw('$value | printf "%.1f"') }}%
- alert: UpsWrongInputVoltage - alert: UpsWrongInputVoltage
expr: (upsInputVoltage < 210) or (upsInputVoltage > 250) expr: (upsInputVoltage < 210) or (upsInputVoltage > 250)
@ -130,7 +131,7 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} Tension d'entrée de {{ raw("$value") }}V {{ instance }} Tension d'entrée de {{ raw('$value') }}V
- alert: UpsWrongOutputVoltage - alert: UpsWrongOutputVoltage
expr: (upsOutputVoltage < 220) or (upsOutputVoltage > 240) expr: (upsOutputVoltage < 220) or (upsOutputVoltage > 240)
@ -139,7 +140,7 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} Tension de sortie de {{ raw("$value") }}V {{ instance }} Tension de sortie de {{ raw('$value') }}V
- alert: UpsTimeRemainingWarning - alert: UpsTimeRemainingWarning
expr: upsEstimatedMinutesRemaining < 8 expr: upsEstimatedMinutesRemaining < 8
@ -148,7 +149,7 @@ groups:
severity: warning severity: warning
annotations: annotations:
summary: >- summary: >-
{{ instance }} Autonomie restante de {{ raw("$value") }} min {{ instance }} Autonomie restante de {{ raw('$value') }} min
- alert: UpsTimeRemainingCritical - alert: UpsTimeRemainingCritical
expr: upsEstimatedMinutesRemaining < 5 expr: upsEstimatedMinutesRemaining < 5
@ -157,5 +158,5 @@ groups:
severity: critical severity: critical
annotations: annotations:
summary: >- summary: >-
{{ instance }} Autonomie restante de {{ raw("$value") }} min {{ instance }} Autonomie restante de {{ raw('$value') }} min
... ...

View file

@ -1,6 +1,12 @@
--- ---
{{ ansible_managed | comment }} {{ ansible_managed | comment }}
{% macro raw(string) -%}
{{ "{{" }} {{ string }} {{ "}}" }}
{%- endmacro %}
{% set instance = "[{{ $labels.instance }}]" %}
groups: groups:
- name: alert.rules - name: alert.rules
rules: rules:
@ -11,6 +17,5 @@ groups:
severity: critical severity: critical
annotations: annotations:
summary: >- summary: >-
Federate : {{ "{{" }} $labels.instance {{ "}}" }} est invisible {{ instance }} Invisible depuis plus de 3 minutes
depuis plus de 3 minutes !
... ...