Update alert rules of UPS
Some checks failed
continuous-integration/drone/push Build is failing

This commit is contained in:
pz2891 2021-01-23 19:01:27 +01:00
parent 705fe953ae
commit bac377f634
6 changed files with 26 additions and 86 deletions

9
hosts
View file

@ -345,6 +345,7 @@ dns-rives-backup.adm.auro.re
radius-rives-backup.adm.auro.re
routeur-rives-backup.adm.auro.re
ldap-replica-rives.adm.auro.re
prometheus-rives.adm.auro.re
[rives_unifi]
r3-4-4.borne.auro.re
@ -396,29 +397,31 @@ ovh_vm
[fleming:children]
fleming_pve
fleming_vm
#fleming_unifi
fleming_unifi
# everything at pacaterie
[pacaterie:children]
pacaterie_pve
pacaterie_vm
#pacaterie_unifi
pacaterie_unifi
# everything at edc
[edc:children]
edc_pve
edc_vm
edc_unifi
# everything at georgesand
[gs:children]
gs_pve
gs_vm
gs_unifi
# everything at Les Rives
[rives:children]
rives_pve
rives_vm
rives_unifi
###############################################################################
# Groups by type

View file

@ -61,6 +61,24 @@
roles:
- prometheus
- hosts: prometheus-rives.adm.auro.re
vars:
prometheus_alertmanager: docker-ovh.adm.auro.re:9093
snmp_unifi_password: "{{ vault_snmp_unifi_password }}"
# Prometheus targets.json
prometheus_ups_snmp_targets:
- ups-r3-1.ups.auro.re
prometheus_targets:
- targets: |
{{ groups['rives_pve'] + groups['rives_vm'] | list | sort }}
prometheus_unifi_snmp_targets:
- targets: "{{ groups['rives_unifi'] | list | sort }}"
roles:
- prometheus
# Monitor all hosts
- hosts: all,!unifi,!ovh
roles:

View file

@ -61,7 +61,7 @@
content: "{{ [{'targets': prometheus_ups_snmp_targets }] | to_nice_json }}\n"
dest: /etc/prometheus/targets_ups_snmp.json
mode: 0644
when: prometheus_ups_snmp_targets is defined
- name: Activate prometheus service
systemd:

View file

@ -1,76 +0,0 @@
---
- name: Install Prometheus
apt:
update_cache: true
name:
- prometheus
- prometheus-snmp-exporter
register: apt_result
retries: 3
until: apt_result is succeeded
- name: Configure Prometheus
template:
src: prometheus/prometheus.yml.j2
dest: /etc/prometheus/prometheus.yml
mode: 0644
notify: Restart Prometheus
- name: Configure Prometheus alert rules
template:
src: "prometheus/{{ item }}.j2"
dest: "/etc/prometheus/{{ item }}"
mode: 0644
notify: Restart Prometheus
loop:
- alert.rules.yml
- django.rules.yml
- name: Make Prometheus snmp-exporter listen on localhost only
lineinfile:
path: /etc/default/prometheus-snmp-exporter
regexp: '^ARGS='
line: "ARGS=\"--web.listen-address=127.0.0.1:9116\""
notify: Restart prometheus-snmp-exporter
# This file store SNMP OIDs
- name: Configure Prometheus snmp-exporter
template:
src: "prometheus/snmp.yml.j2"
dest: "/etc/prometheus/snmp.yml"
mode: 0600
owner: prometheus
notify: Restart prometheus-snmp-exporter
# We don't need to restart Prometheus when updating nodes
- name: Configure Prometheus nodes
copy:
content: "{{ prometheus_targets | to_nice_json }}"
dest: /etc/prometheus/targets.json
mode: 0644
# We don't need to restart Prometheus when updating nodes
- name: Configure Prometheus Ubiquity Unifi SNMP devices
copy:
content: "{{ prometheus_unifi_snmp_targets | to_nice_json }}"
dest: /etc/prometheus/targets_unifi_snmp.json
mode: 0644
- name: Configure Prometheus UPS SNMP devices
copy:
content: "{{ [{'target | to_nice_json }}"
dest: /etc/prometheus/targets_ups_snmp.json
mode: 0644
- name: Activate prometheus service
systemd:
name: prometheus
enabled: true
state: started
- name: Indicate role in motd
template:
src: update-motd.d/05-service.j2
dest: /etc/update-motd.d/05-prometheus
mode: 0755

View file

@ -102,7 +102,7 @@ groups:
summary: "La tension d'entrée de {{ $labels.instance }} est de {{ $value }}V."
- alert: UpsWrongOutputVoltage
expr: (upsOutputVoltage < 225) or (upsOutputVoltage > 235)
expr: (upsOutputVoltage < 220) or (upsOutputVoltage > 240)
for: 5m
labels:
severity: warning

View file

@ -1,5 +0,0 @@
#!/bin/bash
for ip in `cat hosts|grep pacaterie.adm.auro.re`; do
ssh-copy-id $ip
done