WIP: prometheus-ipmi-exporter #63
11 changed files with 164 additions and 11 deletions
33
hosts
33
hosts
|
@ -10,6 +10,8 @@
|
||||||
|
|
||||||
viviane.adm.auro.re
|
viviane.adm.auro.re
|
||||||
|
|
||||||
|
[aurore_server]
|
||||||
|
|
||||||
[aurore_pve]
|
[aurore_pve]
|
||||||
merlin.adm.auro.re
|
merlin.adm.auro.re
|
||||||
|
|
||||||
|
@ -41,21 +43,20 @@ litl.adm.auro.re
|
||||||
log.adm.auro.re
|
log.adm.auro.re
|
||||||
|
|
||||||
[aurore_testing_vm]
|
[aurore_testing_vm]
|
||||||
pendragon.adm.auro.re
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# OVH
|
# OVH
|
||||||
|
|
||||||
|
[ovh_server]
|
||||||
|
|
||||||
[ovh_pve]
|
[ovh_pve]
|
||||||
horus.adm.auro.re
|
horus.adm.auro.re
|
||||||
|
|
||||||
[ovh_container]
|
[ovh_container]
|
||||||
synapse.adm.auro.re
|
synapse.adm.auro.re
|
||||||
phabricator.adm.auro.re
|
wikijs.adm.auro.re
|
||||||
wiki.adm.auro.re
|
|
||||||
www.adm.auro.re
|
www.adm.auro.re
|
||||||
proxy-ovh.adm.auro.re
|
proxy-ovh.adm.auro.re
|
||||||
matrix-services.adm.auro.re
|
|
||||||
|
|
||||||
[ovh_vm]
|
[ovh_vm]
|
||||||
serge.adm.auro.re
|
serge.adm.auro.re
|
||||||
|
@ -192,6 +193,8 @@ fl-4-2.borne.auro.re
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# Pacaterie
|
# Pacaterie
|
||||||
|
|
||||||
|
[pacaterie_server]
|
||||||
|
|
||||||
[pacaterie_pve]
|
[pacaterie_pve]
|
||||||
mordred.adm.auro.re
|
mordred.adm.auro.re
|
||||||
titan.adm.auro.re
|
titan.adm.auro.re
|
||||||
|
@ -270,6 +273,7 @@ ee-2-1.borne.auro.re
|
||||||
ee-2-2.borne.auro.re
|
ee-2-2.borne.auro.re
|
||||||
eo-0-1.borne.auro.re
|
eo-0-1.borne.auro.re
|
||||||
eo-2-1.borne.auro.re
|
eo-2-1.borne.auro.re
|
||||||
|
eo-3-1.borne.auro.re
|
||||||
ep-0-1.borne.auro.re
|
ep-0-1.borne.auro.re
|
||||||
ep-1-1.borne.auro.re
|
ep-1-1.borne.auro.re
|
||||||
ep-1-2.borne.auro.re
|
ep-1-2.borne.auro.re
|
||||||
|
@ -279,6 +283,8 @@ ep-1-3.borne.auro.re
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# George Sand
|
# George Sand
|
||||||
|
|
||||||
|
[gs_server]
|
||||||
|
|
||||||
[gs_pve]
|
[gs_pve]
|
||||||
lancelot.adm.auro.re
|
lancelot.adm.auro.re
|
||||||
odin.adm.auro.re
|
odin.adm.auro.re
|
||||||
|
@ -323,7 +329,6 @@ gd-1-2.borne.auro.re
|
||||||
gd-2-1.borne.auro.re
|
gd-2-1.borne.auro.re
|
||||||
gd-3-1.borne.auro.re
|
gd-3-1.borne.auro.re
|
||||||
gd-4-1.borne.auro.re
|
gd-4-1.borne.auro.re
|
||||||
gd-4-2.borne.auro.re
|
|
||||||
gd-5-1.borne.auro.re
|
gd-5-1.borne.auro.re
|
||||||
gd-5-2.borne.auro.re
|
gd-5-2.borne.auro.re
|
||||||
gd-garage-1.borne.auro.re
|
gd-garage-1.borne.auro.re
|
||||||
|
@ -340,7 +345,6 @@ gf-0-1.borne.auro.re
|
||||||
gf-1-1.borne.auro.re
|
gf-1-1.borne.auro.re
|
||||||
gf-2-1.borne.auro.re
|
gf-2-1.borne.auro.re
|
||||||
gf-3-1.borne.auro.re
|
gf-3-1.borne.auro.re
|
||||||
gf-3-2.borne.auro.re
|
|
||||||
gf-4-1.borne.auro.re
|
gf-4-1.borne.auro.re
|
||||||
gf-5-1.borne.auro.re
|
gf-5-1.borne.auro.re
|
||||||
gg-5-1.borne.auro.re
|
gg-5-1.borne.auro.re
|
||||||
|
@ -349,6 +353,9 @@ gh-1-2.borne.auro.re
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# Les Rives
|
# Les Rives
|
||||||
|
|
||||||
|
[rives_server]
|
||||||
|
|
||||||
[rives_pve]
|
[rives_pve]
|
||||||
thor.adm.auro.re
|
thor.adm.auro.re
|
||||||
loki.adm.auro.re
|
loki.adm.auro.re
|
||||||
|
@ -367,6 +374,8 @@ radius-rives.adm.auro.re
|
||||||
routeur-rives.adm.auro.re
|
routeur-rives.adm.auro.re
|
||||||
|
|
||||||
[rives_unifi]
|
[rives_unifi]
|
||||||
|
r1-0-1.borne.auro.re
|
||||||
|
r1-0-2.borne.auro.re
|
||||||
r1-1-1.borne.auro.re
|
r1-1-1.borne.auro.re
|
||||||
r1-1-2.borne.auro.re
|
r1-1-2.borne.auro.re
|
||||||
r1-1-3.borne.auro.re
|
r1-1-3.borne.auro.re
|
||||||
|
@ -383,6 +392,7 @@ r1-3-3.borne.auro.re
|
||||||
r1-3-4.borne.auro.re
|
r1-3-4.borne.auro.re
|
||||||
r1-3-5.borne.auro.re
|
r1-3-5.borne.auro.re
|
||||||
r1-3-6.borne.auro.re
|
r1-3-6.borne.auro.re
|
||||||
|
r2-0-1.borne.auro.re
|
||||||
r2-1-1.borne.auro.re
|
r2-1-1.borne.auro.re
|
||||||
r2-1-2.borne.auro.re
|
r2-1-2.borne.auro.re
|
||||||
r2-1-3.borne.auro.re
|
r2-1-3.borne.auro.re
|
||||||
|
@ -430,11 +440,14 @@ r3-4-8.borne.auro.re
|
||||||
|
|
||||||
# -aurore services
|
# -aurore services
|
||||||
[aurore:children]
|
[aurore:children]
|
||||||
|
aurore_server
|
||||||
|
aurore_pve
|
||||||
aurore_vm
|
aurore_vm
|
||||||
|
|
||||||
|
|
||||||
# everything at ovh
|
# everything at ovh
|
||||||
[ovh:children]
|
[ovh:children]
|
||||||
|
ovh_server
|
||||||
ovh_pve
|
ovh_pve
|
||||||
ovh_container
|
ovh_container
|
||||||
ovh_vm
|
ovh_vm
|
||||||
|
@ -448,6 +461,7 @@ fleming_unifi
|
||||||
|
|
||||||
# everything at pacaterie
|
# everything at pacaterie
|
||||||
[pacaterie:children]
|
[pacaterie:children]
|
||||||
|
pacaterie_server
|
||||||
pacaterie_pve
|
pacaterie_pve
|
||||||
pacaterie_vm
|
pacaterie_vm
|
||||||
pacaterie_unifi
|
pacaterie_unifi
|
||||||
|
@ -461,12 +475,14 @@ edc_unifi
|
||||||
|
|
||||||
# everything at georgesand
|
# everything at georgesand
|
||||||
[gs:children]
|
[gs:children]
|
||||||
|
gs_server
|
||||||
gs_pve
|
gs_pve
|
||||||
gs_vm
|
gs_vm
|
||||||
gs_unifi
|
gs_unifi
|
||||||
|
|
||||||
# everything at Les Rives
|
# everything at Les Rives
|
||||||
[rives:children]
|
[rives:children]
|
||||||
|
rives_server
|
||||||
rives_pve
|
rives_pve
|
||||||
rives_vm
|
rives_vm
|
||||||
rives_unifi
|
rives_unifi
|
||||||
|
@ -489,6 +505,11 @@ rives_vm
|
||||||
|
|
||||||
# every server
|
# every server
|
||||||
[server:children]
|
[server:children]
|
||||||
|
ovh_server
|
||||||
|
aurore_server
|
||||||
|
pacaterie_server
|
||||||
|
gs_server
|
||||||
|
rives_server
|
||||||
fleming_server
|
fleming_server
|
||||||
edc_server
|
edc_server
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,9 @@
|
||||||
{{ groups['fleming_pve'] + groups['fleming_vm'] | list | sort }}
|
{{ groups['fleming_pve'] + groups['fleming_vm'] | list | sort }}
|
||||||
prometheus_unifi_snmp_targets:
|
prometheus_unifi_snmp_targets:
|
||||||
- targets: "{{ groups['fleming_unifi'] | list | sort }}"
|
- targets: "{{ groups['fleming_unifi'] | list | sort }}"
|
||||||
|
prometheus_ipmi_targets:
|
||||||
|
- targets: |
|
||||||
|
{{ groups['fleming_pve'] + groups['fleming_server'] | list | sort }}
|
||||||
roles:
|
roles:
|
||||||
- prometheus
|
- prometheus
|
||||||
|
|
||||||
|
@ -34,6 +37,9 @@
|
||||||
prometheus_ups_snmp_targets:
|
prometheus_ups_snmp_targets:
|
||||||
- ups-pn-1.ups.auro.re
|
- ups-pn-1.ups.auro.re
|
||||||
- ups-ps-1.ups.auro.re
|
- ups-ps-1.ups.auro.re
|
||||||
|
prometheus_ipmi_targets:
|
||||||
|
- targets: |
|
||||||
|
{{ groups['pacaterie_pve'] + groups['pacaterie_server'] | list | sort }}
|
||||||
roles:
|
roles:
|
||||||
- prometheus
|
- prometheus
|
||||||
|
|
||||||
|
@ -54,6 +60,9 @@
|
||||||
prometheus_targets:
|
prometheus_targets:
|
||||||
- targets: |
|
- targets: |
|
||||||
{{ groups['edc_pve'] + groups['edc_vm'] + groups['edc_server'] | list | sort }}
|
{{ groups['edc_pve'] + groups['edc_vm'] + groups['edc_server'] | list | sort }}
|
||||||
|
prometheus_ipmi_targets:
|
||||||
|
- targets: |
|
||||||
|
{{ groups['edc_pve'] + groups['edc_server'] | list | sort }}
|
||||||
prometheus_unifi_snmp_targets:
|
prometheus_unifi_snmp_targets:
|
||||||
- targets: "{{ groups['edc_unifi'] | list | sort }}"
|
- targets: "{{ groups['edc_unifi'] | list | sort }}"
|
||||||
roles:
|
roles:
|
||||||
|
@ -77,6 +86,9 @@
|
||||||
- ups-gk-1.ups.auro.re
|
- ups-gk-1.ups.auro.re
|
||||||
prometheus_pdu_snmp_targets:
|
prometheus_pdu_snmp_targets:
|
||||||
- pdu-ga-1.ups.auro.re
|
- pdu-ga-1.ups.auro.re
|
||||||
|
prometheus_ipmi_targets:
|
||||||
|
- targets: |
|
||||||
|
{{ groups['gs_pve'] + groups['gs_server'] | list | sort }}
|
||||||
roles:
|
roles:
|
||||||
- prometheus
|
- prometheus
|
||||||
|
|
||||||
|
@ -98,6 +110,9 @@
|
||||||
{{ groups['rives_pve'] + groups['rives_vm'] | list | sort }}
|
{{ groups['rives_pve'] + groups['rives_vm'] | list | sort }}
|
||||||
prometheus_unifi_snmp_targets:
|
prometheus_unifi_snmp_targets:
|
||||||
- targets: "{{ groups['rives_unifi'] | list | sort }}"
|
- targets: "{{ groups['rives_unifi'] | list | sort }}"
|
||||||
|
prometheus_ipmi_targets:
|
||||||
|
- targets: |
|
||||||
|
{{ groups['rives_pve'] + groups['rives_server'] | list | sort }}
|
||||||
roles:
|
roles:
|
||||||
- prometheus
|
- prometheus
|
||||||
|
|
||||||
|
@ -113,6 +128,9 @@
|
||||||
prometheus_targets:
|
prometheus_targets:
|
||||||
- targets: |
|
- targets: |
|
||||||
{{ groups['aurore_pve'] + groups['aurore_vm'] | list | sort }}
|
{{ groups['aurore_pve'] + groups['aurore_vm'] | list | sort }}
|
||||||
|
prometheus_ipmi_targets:
|
||||||
|
- targets: |
|
||||||
|
{{ groups['aurore_pve'] + groups['aurore_server'] | list | sort }}
|
||||||
prometheus_postgres_targets:
|
prometheus_postgres_targets:
|
||||||
- targets: |
|
- targets: |
|
||||||
{{ groups['bdd'] + groups['radius'] | list | sort }}
|
{{ groups['bdd'] + groups['radius'] | list | sort }}
|
||||||
|
@ -146,7 +164,7 @@
|
||||||
# Prometheus targets.json
|
# Prometheus targets.json
|
||||||
prometheus_targets:
|
prometheus_targets:
|
||||||
- targets: |
|
- targets: |
|
||||||
{{ groups['ovh_pve'] + groups['ovh_vm'] | list | sort }}
|
{{ groups['ovh_pve'] + groups['ovh_vm'] + groups['ovh_container'] | list | sort }}
|
||||||
prometheus_postgres_targets:
|
prometheus_postgres_targets:
|
||||||
- targets:
|
- targets:
|
||||||
- bdd-ovh.adm.auro.re
|
- bdd-ovh.adm.auro.re
|
||||||
|
@ -180,8 +198,13 @@
|
||||||
roles:
|
roles:
|
||||||
- prometheus_postgres
|
- prometheus_postgres
|
||||||
|
|
||||||
|
# IPMI Exporters
|
||||||
|
- hosts: server,pve
|
||||||
|
roles:
|
||||||
|
- prometheus_ipmi
|
||||||
|
|
||||||
|
|
||||||
# Monitor all hosts
|
# Monitor all hosts
|
||||||
- hosts: all,!edc_unifi,!fleming_unifi,!pacaterie_unifi,!gs_unifi,!rives_unifi,!aurore_testing_vm,!ovh_container
|
- hosts: all,!edc_unifi,!fleming_unifi,!pacaterie_unifi,!gs_unifi,!rives_unifi,!aurore_testing_vm
|
||||||
roles:
|
roles:
|
||||||
- prometheus_node
|
- prometheus_node
|
||||||
|
|
|
@ -113,6 +113,13 @@
|
||||||
mode: 0644
|
mode: 0644
|
||||||
when: prometheus_pdu_snmp_targets is defined
|
when: prometheus_pdu_snmp_targets is defined
|
||||||
|
|
||||||
|
- name: Configure Prometheus ipmi targets monitoring
|
||||||
|
copy:
|
||||||
|
content: "{{ prometheus_ipmi_targets | to_nice_json }}\n"
|
||||||
|
dest: /etc/prometheus/targets_ipmi.json
|
||||||
|
mode: 0644
|
||||||
|
when: prometheus_ipmi_targets is defined
|
||||||
|
|
||||||
- name: Activate prometheus service
|
- name: Activate prometheus service
|
||||||
systemd:
|
systemd:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
|
|
|
@ -112,7 +112,7 @@ groups:
|
||||||
rate(pg_stat_database_xact_rollback{datname!~"template.*"}[3m]) /
|
rate(pg_stat_database_xact_rollback{datname!~"template.*"}[3m]) /
|
||||||
rate(pg_stat_database_xact_commit{datname!~"template.*"}[3m])
|
rate(pg_stat_database_xact_commit{datname!~"template.*"}[3m])
|
||||||
) * 100
|
) * 100
|
||||||
> 7
|
> 20
|
||||||
for: 0m
|
for: 0m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
|
|
|
@ -140,4 +140,22 @@ scrape_configs:
|
||||||
- target_label: __address__
|
- target_label: __address__
|
||||||
replacement: 127.0.0.1:9116
|
replacement: 127.0.0.1:9116
|
||||||
|
|
||||||
|
- job_name: ipmi
|
||||||
|
file_sd_configs:
|
||||||
|
- files:
|
||||||
|
- '/etc/prometheus/targets_ipmi.json'
|
||||||
|
metrics_path: /metrics
|
||||||
|
params:
|
||||||
|
module: [default]
|
||||||
|
relabel_configs:
|
||||||
|
# Do not put :9290 in instance name, rather here
|
||||||
|
- source_labels: [__address__]
|
||||||
|
target_label: __param_target
|
||||||
|
- source_labels: [__param_target]
|
||||||
|
target_label: instance
|
||||||
|
- source_labels: [__param_target]
|
||||||
|
target_label: __address__
|
||||||
|
replacement: '$1:9290'
|
||||||
|
|
||||||
|
|
||||||
...
|
...
|
||||||
|
|
|
@ -11,7 +11,7 @@ groups:
|
||||||
rules:
|
rules:
|
||||||
|
|
||||||
- alert: InstanceDown
|
- alert: InstanceDown
|
||||||
expr: up{instance!~".*.borne.auro.re$"} == 0
|
expr: up{instance!~".*.borne.auro.re$", job="servers"} == 0
|
||||||
for: 3m
|
for: 3m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
|
@ -50,7 +50,7 @@ groups:
|
||||||
node_memory_SwapFree_bytes
|
node_memory_SwapFree_bytes
|
||||||
/ node_memory_SwapTotal_bytes
|
/ node_memory_SwapTotal_bytes
|
||||||
)
|
)
|
||||||
) * 100 >= 20
|
) * 100 >= 50
|
||||||
for: 3m
|
for: 3m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
|
|
|
@ -39,4 +39,5 @@ scrape_configs:
|
||||||
- '{job="apc_pdu_snmp"}'
|
- '{job="apc_pdu_snmp"}'
|
||||||
- '{job="docker"}'
|
- '{job="docker"}'
|
||||||
- '{job="switch_snmp"}'
|
- '{job="switch_snmp"}'
|
||||||
|
- '{job="ipmi"}'
|
||||||
...
|
...
|
||||||
|
|
3
roles/prometheus_ipmi/files/ipmi_conf/ipmi.sh
Executable file
3
roles/prometheus_ipmi/files/ipmi_conf/ipmi.sh
Executable file
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/sh
|
||||||
|
sudo /usr/sbin/$(basename $0) "$@"
|
||||||
|
|
9
roles/prometheus_ipmi/files/prometheus
Normal file
9
roles/prometheus_ipmi/files/prometheus
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
# Prometheus can be sudo for ipmi collector
|
||||||
|
|
||||||
|
prometheus ALL = NOPASSWD: /usr/sbin/ipmimonitoring,\
|
||||||
|
/usr/sbin/ipmi-sensors,\
|
||||||
|
/usr/sbin/ipmi-dcmi,\
|
||||||
|
/usr/sbin/ipmi-raw,\
|
||||||
|
/usr/sbin/bmc-info,\
|
||||||
|
/usr/sbin/ipmi-chassis,\
|
||||||
|
/usr/sbin/ipmi-sel
|
6
roles/prometheus_ipmi/handlers/main.yml
Normal file
6
roles/prometheus_ipmi/handlers/main.yml
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
---
|
||||||
|
- name: Restart prometheus-ipmi-exporter
|
||||||
|
service:
|
||||||
|
name: prometheus-ipmi-exporter
|
||||||
|
state: restarted
|
||||||
|
daemon_reload: true
|
65
roles/prometheus_ipmi/tasks/main.yml
Normal file
65
roles/prometheus_ipmi/tasks/main.yml
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
---
|
||||||
|
- name: Install Prometheus ipmi-exporter
|
||||||
|
apt:
|
||||||
|
update_cache: true
|
||||||
|
name: prometheus-ipmi-exporter
|
||||||
|
register: apt_result
|
||||||
|
retries: 3
|
||||||
|
until: apt_result is succeeded
|
||||||
|
|
||||||
|
- name: Make Prometheus ipmi-exporter scrap on localhost only
|
||||||
|
lineinfile:
|
||||||
|
path: /etc/default/prometheus-ipmi-exporter
|
||||||
|
regexp: '^ARGS='
|
||||||
|
line: |
|
||||||
|
ARGS="--config.file=/etc/prometheus/ipmi_local.yml --freeipmi.path='/etc/prometheus/ipmi_conf/'"
|
||||||
|
notify: Restart prometheus-ipmi-exporter
|
||||||
|
|
||||||
|
- name: Define prometheus to be sudo for some command
|
||||||
|
copy:
|
||||||
|
src: files/prometheus
|
||||||
|
dest: /etc/sudoers.d/prometheus
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: u=r,g=r,o=
|
||||||
|
notify: Restart prometheus-ipmi-exporter
|
||||||
|
|
||||||
|
- name: Override commands launchs by prometheus for ipmi
|
||||||
|
copy:
|
||||||
|
src: files/ipmi_conf
|
||||||
|
dest: /etc/prometheus
|
||||||
|
owner: prometheus
|
||||||
|
group: prometheus
|
||||||
|
mode: u=rx,g=r,o=
|
||||||
|
notify: Restart prometheus-ipmi-exporter
|
||||||
|
|
||||||
|
- name: Override commands launchs by prometheus for ipmi
|
||||||
|
file:
|
||||||
|
src: '/etc/prometheus/ipmi_conf/ipmi.sh'
|
||||||
|
dest: '/etc/prometheus/ipmi_conf/{{ item }}'
|
||||||
|
state: link
|
||||||
|
force: true
|
||||||
|
owner: prometheus
|
||||||
|
group: prometheus
|
||||||
|
mode: u=rx,g=,o=
|
||||||
|
loop:
|
||||||
|
- bmc-info
|
||||||
|
- ipmi-chassis
|
||||||
|
- ipmi-dcmi
|
||||||
|
- ipmimonitoring
|
||||||
|
- ipmi-raw
|
||||||
|
- ipmi-sensors
|
||||||
|
- ipmi-sel
|
||||||
|
notify: Restart prometheus-ipmi-exporter
|
||||||
|
|
||||||
|
- name: Activate prometheus-ipmi-exporter service
|
||||||
|
systemd:
|
||||||
|
name: prometheus-ipmi-exporter
|
||||||
|
enabled: true
|
||||||
|
state: started
|
||||||
|
|
||||||
|
- name: Restart prometheus-ipmi-exporter
|
||||||
|
systemd:
|
||||||
|
state: restarted
|
||||||
|
daemon_reload: true
|
||||||
|
name: prometheus-ipmi-exporter
|
Loading…
Reference in a new issue