Prometheus: cleanup #90
2 changed files with 59 additions and 1 deletions
|
@ -38,10 +38,10 @@
|
||||||
loop:
|
loop:
|
||||||
- server.rules.yml
|
- server.rules.yml
|
||||||
- docker.rules.yml
|
- docker.rules.yml
|
||||||
- django.rules.yml
|
|
||||||
- ups.rules.yml
|
- ups.rules.yml
|
||||||
- postgres.rules.yml
|
- postgres.rules.yml
|
||||||
- environmental.rules.yml
|
- environmental.rules.yml
|
||||||
|
- ilo.rules.yml
|
||||||
notify: Restart Prometheus
|
notify: Restart Prometheus
|
||||||
|
|
||||||
- name: Make Prometheus snmp-exporter listen on localhost only
|
- name: Make Prometheus snmp-exporter listen on localhost only
|
||||||
|
|
58
roles/prometheus/templates/ilo.rules.yml.j2
Normal file
58
roles/prometheus/templates/ilo.rules.yml.j2
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
---
|
||||||
|
{{ ansible_managed | comment }}
|
||||||
|
|
||||||
|
{% macro raw(string) -%}
|
||||||
|
{{ "{{" }} {{ string }} {{ "}}" }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
groups:
|
||||||
|
|
||||||
|
- name: ilo.rules
|
||||||
|
rules:
|
||||||
|
|
||||||
|
- alert: IloBiosSelfTestDegraded
|
||||||
|
expr: cpqHeHWBiosCondition >= 3
|
||||||
|
for: 3m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: >-
|
||||||
|
Une erreur a été détectée lors du POST du serveur
|
||||||
|
|
||||||
|
- alert: IloTemperatureSensorDegraded
|
||||||
|
expr: cpqHeTemperatureCondition >= 3
|
||||||
|
for: 3m
|
||||||
jeltz marked this conversation as resolved
|
|||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: >-
|
||||||
|
Le capteur de température est dégradé
|
||||||
|
|
||||||
|
- alert: IloFanDegraded
|
||||||
|
expr: cpqHeFltTolFanCondition >= 3
|
||||||
|
for: 3m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: >-
|
||||||
|
Le ventilateur est dégradé
|
||||||
|
|
||||||
|
- alert: IloPowerSupplyDegraded
|
||||||
|
expr: cpqHeFltTolPowerSupplyStatus >= 2
|
||||||
|
for: 3m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: >-
|
||||||
|
L'alimentation est dégradée
|
||||||
|
|
||||||
|
- alert: IloOverrideSwitchState
|
||||||
|
expr: cpqSm2CntlriLOSecurityOverrideSwitchState == 2
|
||||||
|
for: 3m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: >-
|
||||||
|
Le switch de réinitialisation n'est pas à l'état d'origine
|
||||||
|
|
||||||
|
...
|
Loading…
Reference in a new issue
À quoi correspond
other
? C'est jamais une erreur ?other = le module est absent par exemple, on a BEAUCOUP de other
Ok