diff --git a/monitoring.yml b/monitoring.yml index bcf4ef2..c81934c 100755 --- a/monitoring.yml +++ b/monitoring.yml @@ -121,8 +121,9 @@ - prometheus-rives.adm.auro.re - prometheus-aurore.adm.auro.re - prometheus-ovh.adm.auro.re + - prometheus-federate.adm.auro.re roles: - - prometheus-federate + - prometheus_federate # Monitor all hosts diff --git a/roles/prometheus/tasks/main.yml.save b/roles/prometheus/tasks/main.yml.save new file mode 100644 index 0000000..57945ce --- /dev/null +++ b/roles/prometheus/tasks/main.yml.save @@ -0,0 +1,84 @@ +--- +- name: Install Prometheus + apt: + update_cache: true + name: + - prometheus + - prometheus-snmp-exporter + register: apt_result + retries: 3 + until: apt_result is succeeded + +- name: Configure Prometheus + template: + src: prometheus/prometheus.yml.j2 + dest: /etc/prometheus/prometheus.yml + mode: 0644 + notify: Restart Prometheus + +- name: Configure Prometheus alert rules + template: + src: "prometheus/{{ item }}.j2" + dest: "/etc/prometheus/{{ item }}" + mode: 0644 + notify: Restart Prometheus + loop: + - alert.rules.yml + - django.rules.yml + +- name: Make Prometheus snmp-exporter listen on localhost only + lineinfile: + path: /etc/default/prometheus-snmp-exporter + regexp: '^ARGS=' + line: "ARGS=\"--web.listen-address=127.0.0.1:9116\"" + notify: Restart prometheus-snmp-exporter + +# This file store SNMP OIDs +- name: Configure Prometheus snmp-exporter + template: + src: "prometheus/snmp.yml.j2" + dest: "/etc/prometheus/snmp.yml" + mode: 0600 + owner: prometheus + notify: Restart prometheus-snmp-exporter + +# We don't need to restart Prometheus when updating nodes +- name: Configure Prometheus nodes + copy: + content: "{{ prometheus_targets | to_nice_json }}" + dest: /etc/prometheus/targets.json + mode: 0644 + +# We don't need to restart Prometheus when updating nodes +- name: Configure Prometheus Ubiquity Unifi SNMP devices + copy: + content: "{{ prometheus_unifi_snmp_targets | to_nice_json }}" + dest: /etc/prometheus/targets_unifi_snmp.json + mode: 0644 + when: prometheus_unifi_snmp_targets is defined + +- name: Configure Prometheus UPS SNMP devices + copy: + content: "{{ [{'targets': prometheus_ups_snmp_targets }]7yk[:Cp_g$#dT'yv!. | to_nice_json }}\n" + dest: /etc/prometheus/targets_ups_snmp.json + mode: 0644 + when: prometheus_ups_snmp_targets is defined + +- name: Configure Prometheus docker monitoring + copy: + content: "{{ [{'targets': prometheus_docker_targets }] | to_nice_json }}\n" + dest: /etc/prometheus/targets_docker.json + mode: 0644 + when: prometheus_docker_targets is defined + +- name: Activate prometheus service + systemd: + name: prometheus + enabled: true + state: started + +- name: Indicate role in motd + template: + src: update-motd.d/05-service.j2 + dest: /etc/update-motd.d/05-prometheus + mode: 0755 diff --git a/roles/prometheus-federate/handlers/main.yml b/roles/prometheus_federate/handlers/main.yml similarity index 100% rename from roles/prometheus-federate/handlers/main.yml rename to roles/prometheus_federate/handlers/main.yml diff --git a/roles/prometheus-federate/tasks/main.yml b/roles/prometheus_federate/tasks/main.yml similarity index 100% rename from roles/prometheus-federate/tasks/main.yml rename to roles/prometheus_federate/tasks/main.yml diff --git a/roles/prometheus-federate/templates/prometheus/alert.rules.yml.j2 b/roles/prometheus_federate/templates/prometheus/alert.rules.yml.j2 similarity index 65% rename from roles/prometheus-federate/templates/prometheus/alert.rules.yml.j2 rename to roles/prometheus_federate/templates/prometheus/alert.rules.yml.j2 index e2cb42c..0fd14f5 100644 --- a/roles/prometheus-federate/templates/prometheus/alert.rules.yml.j2 +++ b/roles/prometheus_federate/templates/prometheus/alert.rules.yml.j2 @@ -13,7 +13,7 @@ groups: labels: severity: critical annotations: - summary: "{{ $labels.instance }} est invisible depuis plus de 3 minutes !" + summary: "Federate : {{ $labels.exported_instance }} est invisible depuis plus de 3 minutes !" # Alert for out of memory - alert: OutOfMemory @@ -22,7 +22,7 @@ groups: labels: severity: warning annotations: - summary: "Mémoire libre de {{ $labels.instance }} à {{ humanize $value }}%." + summary: "Federate : Mémoire libre de {{ $labels.exported_instance }} à {{ humanize $value }}%." # Alert for out of disk space - alert: OutOfDiskSpace @@ -31,7 +31,7 @@ groups: labels: severity: warning annotations: - summary: "Espace libre de {{ $labels.mountpoint }} sur {{ $labels.instance }} à {{ humanize $value }}%." + summary: "Espace libre de {{ $labels.mountpoint }} sur {{ $labels.exported_instance }} à {{ humanize $value }}%." # Alert for out of inode space on disk - alert: OutOfInodes @@ -40,7 +40,7 @@ groups: labels: severity: warning annotations: - summary: "Presque plus d'inodes disponibles ({{ $value }}% restant) dans {{ $labels.mountpoint }} sur {{ $labels.instance }}." + summary: "Federate : Presque plus d'inodes disponibles ({{ $value }}% restant) dans {{ $labels.mountpoint }} sur {{ $labels.exported_instance }}." # Alert for high CPU usage - alert: CpuUsage @@ -49,7 +49,7 @@ groups: labels: severity: warning annotations: - summary: "CPU sur {{ $labels.instance }} à {{ humanize $value }}%." + summary: "Federate : CPU sur {{ $labels.exported_instance }} à {{ humanize $value }}%." # Check systemd unit (> buster) - alert: SystemdServiceFailed @@ -58,8 +58,8 @@ groups: labels: severity: warning annotations: - summary: "{{ $labels.name }} a échoué sur {{ $labels.instance }}" - + summary: "Federate : {{ $labels.name }} a échoué sur {{ $labels.exported_instance }}" + # Check UPS - alert: UpsOutputSourceChanged expr: upsOutputSource != 3 @@ -67,7 +67,7 @@ groups: labels: severity: warning annotations: - summary: "La source d'alimentation de {{ $labels.instance }} a changé !" + summary: "Federate : La source d'alimentation de {{ $labels.exported_instance }} a changé !" - alert: UpsBatteryStatusWarning expr: upsBatteryStatus == 3 @@ -75,7 +75,7 @@ groups: labels: severity: warning annotations: - summary: "L'état de la batterie de {{ $labels.instance }} est faible !" + summary: "Federate : L'état de la batterie de {{ $labels.exported_instance }} est faible !" - alert: UpsBatteryStatusCritical expr: upsBatteryStatus == 4 @@ -83,7 +83,7 @@ groups: labels: severity: warning annotations: - summary: "L'état de la batterie de {{ $labels.instance }} est affaibli !" + summary: "L'état de la batterie de {{ $labels.exported_instance }} est affaibli !" - alert: UpsHighLoad expr: upsOutputPercentLoad > 70 @@ -91,7 +91,7 @@ groups: labels: severity: critical annotations: - summary: "La charge de {{ $labels.instance }} est de {{ $value }}% !" + summary: "Federate : La charge de {{ $labels.exported_instance }} est de {{ $value }}% !" - alert: UpsWrongInputVoltage expr: (upsInputVoltage < 210) or (upsInputVoltage > 250) @@ -99,7 +99,7 @@ groups: labels: severity: warning annotations: - summary: "La tension d'entrée de {{ $labels.instance }} est de {{ $value }}V." + summary: "Federate : La tension d'entrée de {{ $labels.exported_instance }} est de {{ $value }}V." - alert: UpsWrongOutputVoltage expr: (upsOutputVoltage < 220) or (upsOutputVoltage > 240) @@ -107,7 +107,7 @@ groups: labels: severity: warning annotations: - summary: "La tension de sortie de {{ $labels.instance }} est de {{ $value }}V." + summary: "Federate : La tension de sortie de {{ $labels.exported_instance }} est de {{ $value }}V." - alert: UpsTimeRemainingWarning expr: upsEstimatedMinutesRemaining < 15 @@ -115,7 +115,7 @@ groups: labels: severity: warning annotations: - summary: "L'autonomie restante sur {{ $labels.instance }} est de {{ $value }} min." + summary: "Federate : L'autonomie restante sur {{ $labels.exported_instance }} est de {{ $value }} min." - alert: UpsTimeRemainingCritical expr: upsEstimatedMinutesRemaining < 5 @@ -123,7 +123,7 @@ groups: labels: severity: critical annotations: - summary: "L'autonomie restante sur {{ $labels.instance }} est de {{ $value }} min." + summary: "Federate : L'autonomie restante sur {{ $labels.exported_instance }} est de {{ $value }} min." {% endraw %} diff --git a/roles/prometheus-federate/templates/prometheus/django.rules.yml.j2 b/roles/prometheus_federate/templates/prometheus/django.rules.yml.j2 similarity index 100% rename from roles/prometheus-federate/templates/prometheus/django.rules.yml.j2 rename to roles/prometheus_federate/templates/prometheus/django.rules.yml.j2 diff --git a/roles/prometheus-federate/templates/prometheus/prometheus.yml.j2 b/roles/prometheus_federate/templates/prometheus/prometheus.yml.j2 similarity index 98% rename from roles/prometheus-federate/templates/prometheus/prometheus.yml.j2 rename to roles/prometheus_federate/templates/prometheus/prometheus.yml.j2 index 0d4c601..52e5a92 100644 --- a/roles/prometheus-federate/templates/prometheus/prometheus.yml.j2 +++ b/roles/prometheus_federate/templates/prometheus/prometheus.yml.j2 @@ -52,4 +52,5 @@ scrape_configs: - '{job="ups_snmp"}' - '{job="django"}' - '{job="docker"}' + - '{job="switch"}' diff --git a/roles/prometheus-federate/templates/prometheus/snmp.yml.j2 b/roles/prometheus_federate/templates/prometheus/snmp.yml.j2 similarity index 100% rename from roles/prometheus-federate/templates/prometheus/snmp.yml.j2 rename to roles/prometheus_federate/templates/prometheus/snmp.yml.j2 diff --git a/roles/prometheus-federate/templates/update-motd.d/05-service.j2 b/roles/prometheus_federate/templates/update-motd.d/05-service.j2 similarity index 100% rename from roles/prometheus-federate/templates/update-motd.d/05-service.j2 rename to roles/prometheus_federate/templates/update-motd.d/05-service.j2