Setup config snmp for Prometheus, to monitore Aurore's PDU
Some checks failed
continuous-integration/drone/push Build is failing

This commit is contained in:
pz2891 2021-01-21 21:26:40 +01:00
parent 078d141236
commit 40d3c22276
6 changed files with 263 additions and 0 deletions

View file

@ -34,6 +34,9 @@
snmp_unifi_password: "{{ vault_snmp_unifi_password }}" snmp_unifi_password: "{{ vault_snmp_unifi_password }}"
# Prometheus targets.json # Prometheus targets.json
prometheus_ups_snmp_targets:
- ups-ec-1.ups.auro.re
prometheus_targets: prometheus_targets:
- targets: | - targets: |
{{ groups['edc_pve'] + groups['edc_vm'] | list | sort }} {{ groups['edc_pve'] + groups['edc_vm'] | list | sort }}

View file

@ -0,0 +1,89 @@
---
# Should contain only small tools that everyone can't live without
- name: Install basic tools
when: ansible_os_family == "Debian"
apt:
name:
- sudo
- molly-guard # prevent reboot
- ntp # network time sync
- apt # better than apt-get
- nano # for vulcain
- vim # better than nano
- emacs-nox # for maman
- htop # better than top
- zsh # to be able to ssh @erdnaxe
- fish # to motivate @edpibu
- oidentd # postgresql identification
- aptitude # nice to have for Ansible
- acl # advanced ACL
- iotop # monitor i/o
- tree # create a graphical tree of files
- bash-completion # because bash
- git # code versioning
- less # i like cats
- screen # Vulcain asked for this
- lsb-release
update_cache: true
register: apt_result
retries: 3
until: apt_result is succeeded
# Pimp my server
- name: Customize motd
copy:
src: "update-motd.d/{{ item }}"
dest: "/etc/update-motd.d/{{ item }}"
mode: 0755
loop:
- 00-logo
- 10-uname
- name: Remove Debian warranty motd
file:
path: /etc/motd
state: absent
# Configure APT mirrors on Debian Stretch
- name: Configure APT mirrors
when:
- ansible_distribution == 'Debian'
- ansible_distribution_release == 'stretch'
template:
src: apt/sources.list.j2
dest: /etc/apt/sources.list
mode: 0644
# Patriotisme
- name: Ensure French UTF-8 locale exists
locale_gen:
name: fr_FR.UTF-8
state: present
# Fix LC_CTYPE="C"
- name: Select default locale
debconf:
name: locales
question: locales/default_environment_locale
value: fr_FR.UTF-8
vtype: select
notify: Reconfigure locales
# APT-List Changes : send email with changelog
- include_tasks: apt-listchanges.yml
# User skeleton
- name: Configure user skeleton
copy:
src: "skel/dot_{{ item }}"
dest: "/etc/skel/.{{ item }}"
mode: 0644
loop:
- zshrc
- zshrc.local
- name: Configure resolvconf
template:
src: resolv.conf
dest: /etc/resolv.conf
mode: 0644

View file

@ -56,6 +56,13 @@
dest: /etc/prometheus/targets_unifi_snmp.json dest: /etc/prometheus/targets_unifi_snmp.json
mode: 0644 mode: 0644
- name: Configure Prometheus UPS SNMP devices
copy:
content: "{{ [{'targets': prometheus_ups_snmp_targets }] | to_nice_json }}\n"
dest: /etc/prometheus/targets_ups_snmp.json
mode: 0644
- name: Activate prometheus service - name: Activate prometheus service
systemd: systemd:
name: prometheus name: prometheus

View file

@ -0,0 +1,76 @@
---
- name: Install Prometheus
apt:
update_cache: true
name:
- prometheus
- prometheus-snmp-exporter
register: apt_result
retries: 3
until: apt_result is succeeded
- name: Configure Prometheus
template:
src: prometheus/prometheus.yml.j2
dest: /etc/prometheus/prometheus.yml
mode: 0644
notify: Restart Prometheus
- name: Configure Prometheus alert rules
template:
src: "prometheus/{{ item }}.j2"
dest: "/etc/prometheus/{{ item }}"
mode: 0644
notify: Restart Prometheus
loop:
- alert.rules.yml
- django.rules.yml
- name: Make Prometheus snmp-exporter listen on localhost only
lineinfile:
path: /etc/default/prometheus-snmp-exporter
regexp: '^ARGS='
line: "ARGS=\"--web.listen-address=127.0.0.1:9116\""
notify: Restart prometheus-snmp-exporter
# This file store SNMP OIDs
- name: Configure Prometheus snmp-exporter
template:
src: "prometheus/snmp.yml.j2"
dest: "/etc/prometheus/snmp.yml"
mode: 0600
owner: prometheus
notify: Restart prometheus-snmp-exporter
# We don't need to restart Prometheus when updating nodes
- name: Configure Prometheus nodes
copy:
content: "{{ prometheus_targets | to_nice_json }}"
dest: /etc/prometheus/targets.json
mode: 0644
# We don't need to restart Prometheus when updating nodes
- name: Configure Prometheus Ubiquity Unifi SNMP devices
copy:
content: "{{ prometheus_unifi_snmp_targets | to_nice_json }}"
dest: /etc/prometheus/targets_unifi_snmp.json
mode: 0644
- name: Configure Prometheus UPS SNMP devices
copy:
content: "{{ [{'target | to_nice_json }}"
dest: /etc/prometheus/targets_ups_snmp.json
mode: 0644
- name: Activate prometheus service
systemd:
name: prometheus
enabled: true
state: started
- name: Indicate role in motd
template:
src: update-motd.d/05-service.j2
dest: /etc/update-motd.d/05-prometheus
mode: 0755

View file

@ -65,3 +65,19 @@ scrape_configs:
scheme: https scheme: https
static_configs: static_configs:
- targets: [] - targets: []
- job_name: ups_snmp
file_sd_configs:
- files:
- '/etc/prometheus/targets_ups_snmp.json'
metrics_path: /snmp
params:
module: [eatonups]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 127.0.0.1:9116

View file

@ -6,6 +6,78 @@
# - Optimiser les règles pour les bornes Unifi, # - Optimiser les règles pour les bornes Unifi,
# on pourrait indexer avec les SSID # on pourrait indexer avec les SSID
eatonups:
walk:
- 1.3.6.1.2.1.33.1.2
- 1.3.6.1.2.1.33.1.3
- 1.3.6.1.2.1.33.1.4
- 1.3.6.1.4.1.534.1.6
get:
- 1.3.6.1.2.1.1.3.0
metrics:
- name: sysUpTime
oid: 1.3.6.1.2.1.1.3
type: gauge
help: The time (in hundredths of a second) since the network management portion
of the system was last re-initialized. - 1.3.6.1.2.1.1.3
- name: upsBatteryStatus
oid: 1.3.6.1.2.1.33.1.2.1
type: gauge
help: The indication of the capacity remaining in the UPS system's batteries -
1.3.6.1.2.1.33.1.2.1
- name: upsEstimatedMinutesRemaining
oid: 1.3.6.1.2.1.33.1.2.3
type: gauge
help: An estimate of the time to battery charge depletion under the present load
conditions if the utility power is off and remains off, or if it were to be
lost and remain off. - 1.3.6.1.2.1.33.1.2.3
- name: upsInputVoltage
oid: 1.3.6.1.2.1.33.1.3.3.1.3
type: gauge
help: The magnitude of the present input voltage. - 1.3.6.1.2.1.33.1.3.3.1.3
indexes:
- labelname: upsInputLineIndex
type: gauge
- name: upsOutputSource
oid: 1.3.6.1.2.1.33.1.4.1
type: gauge
help: The present source of output power - 1.3.6.1.2.1.33.1.4.1
- name: upsOutputVoltage
oid: 1.3.6.1.2.1.33.1.4.4.1.2
type: gauge
help: The present output voltage. - 1.3.6.1.2.1.33.1.4.4.1.2
indexes:
- labelname: upsOutputLineIndex
type: gauge
- name: upsOutputPower
oid: 1.3.6.1.2.1.33.1.4.4.1.4
type: gauge
help: The present output true power. - 1.3.6.1.2.1.33.1.4.4.1.4
indexes:
- labelname: upsOutputLineIndex
type: gauge
- name: upsOutputPercentLoad
oid: 1.3.6.1.2.1.33.1.4.4.1.5
type: gauge
help: The percentage of the UPS power capacity presently being used on this output
line, i.e., the greater of the percent load of true power capacity and the percent
load of VA. - 1.3.6.1.2.1.33.1.4.4.1.5
indexes:
- labelname: upsOutputLineIndex
type: gauge
- name: xupsEnvRemoteTemp
oid: 1.3.6.1.4.1.534.1.6.5
type: gauge
help: The reading of an EMP's temperature sensor. - 1.3.6.1.4.1.534.1.6.5
- name: xupsEnvRemoteHumidity
oid: 1.3.6.1.4.1.534.1.6.6
type: gauge
help: The reading of an EMP's humidity sensor. - 1.3.6.1.4.1.534.1.6.6
version: 1
auth:
community: public
procurve_switch: procurve_switch:
walk: walk:
- 1.3.6.1.2.1.31.1.1.1.10 - 1.3.6.1.2.1.31.1.1.1.10