diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml index 896e34f..783bdad 100644 --- a/roles/prometheus/tasks/main.yml +++ b/roles/prometheus/tasks/main.yml @@ -38,10 +38,10 @@ loop: - server.rules.yml - docker.rules.yml - - django.rules.yml - ups.rules.yml - postgres.rules.yml - environmental.rules.yml + - ilo.rules.yml notify: Restart Prometheus - name: Make Prometheus snmp-exporter listen on localhost only diff --git a/roles/prometheus/templates/ilo.rules.yml.j2 b/roles/prometheus/templates/ilo.rules.yml.j2 new file mode 100644 index 0000000..4a0bc37 --- /dev/null +++ b/roles/prometheus/templates/ilo.rules.yml.j2 @@ -0,0 +1,58 @@ +--- +{{ ansible_managed | comment }} + +{% macro raw(string) -%} +{{ "{{" }} {{ string }} {{ "}}" }} +{%- endmacro %} + +groups: + + - name: ilo.rules + rules: + + - alert: IloBiosSelfTestDegraded + expr: cpqHeHWBiosCondition >= 3 + for: 3m + labels: + severity: critical + annotations: + summary: >- + Une erreur a été détectée lors du POST du serveur + + - alert: IloTemperatureSensorDegraded + expr: cpqHeTemperatureCondition >= 3 + for: 3m + labels: + severity: critical + annotations: + summary: >- + Le capteur de température est dégradé + + - alert: IloFanDegraded + expr: cpqHeFltTolFanCondition >= 3 + for: 3m + labels: + severity: critical + annotations: + summary: >- + Le ventilateur est dégradé + + - alert: IloPowerSupplyDegraded + expr: cpqHeFltTolPowerSupplyStatus >= 2 + for: 3m + labels: + severity: critical + annotations: + summary: >- + L'alimentation est dégradée + + - alert: IloOverrideSwitchState + expr: cpqSm2CntlriLOSecurityOverrideSwitchState == 2 + for: 3m + labels: + severity: critical + annotations: + summary: >- + Le switch de réinitialisation n'est pas à l'état d'origine + +...