From 9d18ebb7f14a5b4f543ed9a9066711c46877620b Mon Sep 17 00:00:00 2001 From: pz2891 Date: Sun, 11 Apr 2021 17:18:32 +0200 Subject: [PATCH] Fix docker rules --- roles/prometheus/tasks/main.yml | 1 + .../prometheus/templates/docker.rules.yml.j2 | 74 +++++++++---------- 2 files changed, 38 insertions(+), 37 deletions(-) diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml index 075da46..0c76907 100644 --- a/roles/prometheus/tasks/main.yml +++ b/roles/prometheus/tasks/main.yml @@ -33,6 +33,7 @@ - alerts.rules.yml - django.rules.yml + - name: Configure Prometheus alerts template: src: "{{ item }}.j2" diff --git a/roles/prometheus/templates/docker.rules.yml.j2 b/roles/prometheus/templates/docker.rules.yml.j2 index 8ccc565..ce825ad 100644 --- a/roles/prometheus/templates/docker.rules.yml.j2 +++ b/roles/prometheus/templates/docker.rules.yml.j2 @@ -10,41 +10,41 @@ groups: - name: docker.rules rules: - - alert: ContainerDown - expr: docker_container_running_state - for: 0m - labels: - severity: critical - annotations: - summary: >- - Container Docker mort - (instance {{ raw('$labels.instance') }}, container {{ raw('$labels.name') }}) - - - alert: ContainerFailed - expr: sum(increase(docker_container_restart_count[5m])) > 2 - for: 0m - labels: - severity: critical - annotations: - summary: >- - Container Docker redémarre souvent - (instance raw('{{ $labels.instance') }}, container {{ raw('$labels.name') }}) - - - alert: ContainerFailed - expr: - ( - docker_container_cpu_used_total - / - docker_container_cpu_capacity_total - ) * 100 - > 30 - for: 0m - labels: - severity: critical - annotations: - summary: >- - Container Docker utilise beaucoup de CPU - (instance {{ raw('$labels.instance') }}, container {{ raw('$labels.name') }}, - value {{ raw('$value | printf "%.1f"'') }}) - + - alert: ContainerDown + expr: docker_container_running_state != 1 + for: 0m + labels: + severity: critical + annotations: + summary: >- + Container Docker mort + (instance {{ raw('$labels.instance') }}, container {{ raw('$labels.name') }}) + + - alert: ContainerFailed + expr: sum(increase(docker_container_restart_count[5m])) > 2 + for: 0m + labels: + severity: critical + annotations: + summary: >- + Container Docker redémarre souvent + (instance {{ raw('$labels.instance') }}, container {{ raw('$labels.name') }}) + + - alert: ContainerFailed + expr: + ( + docker_container_cpu_used_total + / + docker_container_cpu_capacity_total + ) * 100 + > 30 + for: 0m + labels: + severity: critical + annotations: + summary: >- + Container Docker utilise beaucoup de CPU + (instance {{ raw('$labels.instance') }}, container {{ raw('$labels.name') }}, + value {{ raw('$value | printf "%.1f"') }}) + ...