From bac377f6348691b31783199dbc282f8969963f0f Mon Sep 17 00:00:00 2001 From: pz2891 Date: Sat, 23 Jan 2021 19:01:27 +0100 Subject: [PATCH] Update alert rules of UPS --- hosts | 9 ++- monitoring.yml | 18 +++++ roles/prometheus/tasks/main.yml | 2 +- roles/prometheus/tasks/main.yml.save | 76 ------------------- .../templates/prometheus/alert.rules.yml.j2 | 2 +- test.sh | 5 -- 6 files changed, 26 insertions(+), 86 deletions(-) delete mode 100644 roles/prometheus/tasks/main.yml.save delete mode 100755 test.sh diff --git a/hosts b/hosts index a06cac8..6639a21 100644 --- a/hosts +++ b/hosts @@ -345,6 +345,7 @@ dns-rives-backup.adm.auro.re radius-rives-backup.adm.auro.re routeur-rives-backup.adm.auro.re ldap-replica-rives.adm.auro.re +prometheus-rives.adm.auro.re [rives_unifi] r3-4-4.borne.auro.re @@ -396,29 +397,31 @@ ovh_vm [fleming:children] fleming_pve fleming_vm -#fleming_unifi +fleming_unifi # everything at pacaterie [pacaterie:children] pacaterie_pve pacaterie_vm -#pacaterie_unifi +pacaterie_unifi # everything at edc [edc:children] edc_pve edc_vm +edc_unifi # everything at georgesand [gs:children] gs_pve gs_vm +gs_unifi # everything at Les Rives [rives:children] rives_pve rives_vm - +rives_unifi ############################################################################### # Groups by type diff --git a/monitoring.yml b/monitoring.yml index 9bcc370..fc59738 100755 --- a/monitoring.yml +++ b/monitoring.yml @@ -61,6 +61,24 @@ roles: - prometheus +- hosts: prometheus-rives.adm.auro.re + vars: + prometheus_alertmanager: docker-ovh.adm.auro.re:9093 + snmp_unifi_password: "{{ vault_snmp_unifi_password }}" + + # Prometheus targets.json + prometheus_ups_snmp_targets: + - ups-r3-1.ups.auro.re + + prometheus_targets: + - targets: | + {{ groups['rives_pve'] + groups['rives_vm'] | list | sort }} + prometheus_unifi_snmp_targets: + - targets: "{{ groups['rives_unifi'] | list | sort }}" + roles: + - prometheus + + # Monitor all hosts - hosts: all,!unifi,!ovh roles: diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml index 0b5ad0e..38deaa3 100644 --- a/roles/prometheus/tasks/main.yml +++ b/roles/prometheus/tasks/main.yml @@ -61,7 +61,7 @@ content: "{{ [{'targets': prometheus_ups_snmp_targets }] | to_nice_json }}\n" dest: /etc/prometheus/targets_ups_snmp.json mode: 0644 - + when: prometheus_ups_snmp_targets is defined - name: Activate prometheus service systemd: diff --git a/roles/prometheus/tasks/main.yml.save b/roles/prometheus/tasks/main.yml.save deleted file mode 100644 index cca66e5..0000000 --- a/roles/prometheus/tasks/main.yml.save +++ /dev/null @@ -1,76 +0,0 @@ ---- -- name: Install Prometheus - apt: - update_cache: true - name: - - prometheus - - prometheus-snmp-exporter - register: apt_result - retries: 3 - until: apt_result is succeeded - -- name: Configure Prometheus - template: - src: prometheus/prometheus.yml.j2 - dest: /etc/prometheus/prometheus.yml - mode: 0644 - notify: Restart Prometheus - -- name: Configure Prometheus alert rules - template: - src: "prometheus/{{ item }}.j2" - dest: "/etc/prometheus/{{ item }}" - mode: 0644 - notify: Restart Prometheus - loop: - - alert.rules.yml - - django.rules.yml - -- name: Make Prometheus snmp-exporter listen on localhost only - lineinfile: - path: /etc/default/prometheus-snmp-exporter - regexp: '^ARGS=' - line: "ARGS=\"--web.listen-address=127.0.0.1:9116\"" - notify: Restart prometheus-snmp-exporter - -# This file store SNMP OIDs -- name: Configure Prometheus snmp-exporter - template: - src: "prometheus/snmp.yml.j2" - dest: "/etc/prometheus/snmp.yml" - mode: 0600 - owner: prometheus - notify: Restart prometheus-snmp-exporter - -# We don't need to restart Prometheus when updating nodes -- name: Configure Prometheus nodes - copy: - content: "{{ prometheus_targets | to_nice_json }}" - dest: /etc/prometheus/targets.json - mode: 0644 - -# We don't need to restart Prometheus when updating nodes -- name: Configure Prometheus Ubiquity Unifi SNMP devices - copy: - content: "{{ prometheus_unifi_snmp_targets | to_nice_json }}" - dest: /etc/prometheus/targets_unifi_snmp.json - mode: 0644 - -- name: Configure Prometheus UPS SNMP devices - copy: - content: "{{ [{'target | to_nice_json }}" - dest: /etc/prometheus/targets_ups_snmp.json - mode: 0644 - - -- name: Activate prometheus service - systemd: - name: prometheus - enabled: true - state: started - -- name: Indicate role in motd - template: - src: update-motd.d/05-service.j2 - dest: /etc/update-motd.d/05-prometheus - mode: 0755 diff --git a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 b/roles/prometheus/templates/prometheus/alert.rules.yml.j2 index db99ab7..7ae1928 100644 --- a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 +++ b/roles/prometheus/templates/prometheus/alert.rules.yml.j2 @@ -102,7 +102,7 @@ groups: summary: "La tension d'entrée de {{ $labels.instance }} est de {{ $value }}V." - alert: UpsWrongOutputVoltage - expr: (upsOutputVoltage < 225) or (upsOutputVoltage > 235) + expr: (upsOutputVoltage < 220) or (upsOutputVoltage > 240) for: 5m labels: severity: warning diff --git a/test.sh b/test.sh deleted file mode 100755 index 3e77d04..0000000 --- a/test.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -for ip in `cat hosts|grep pacaterie.adm.auro.re`; do - ssh-copy-id $ip -done -