WIP: prometheus-ipmi-exporter #63
11 changed files with 164 additions and 11 deletions
33
hosts
33
hosts
|
@ -10,6 +10,8 @@
|
|||
|
||||
viviane.adm.auro.re
|
||||
|
||||
[aurore_server]
|
||||
|
||||
[aurore_pve]
|
||||
merlin.adm.auro.re
|
||||
|
||||
|
@ -41,21 +43,20 @@ litl.adm.auro.re
|
|||
log.adm.auro.re
|
||||
|
||||
[aurore_testing_vm]
|
||||
pendragon.adm.auro.re
|
||||
|
||||
###############################################################################
|
||||
# OVH
|
||||
|
||||
[ovh_server]
|
||||
|
||||
[ovh_pve]
|
||||
horus.adm.auro.re
|
||||
|
||||
[ovh_container]
|
||||
synapse.adm.auro.re
|
||||
phabricator.adm.auro.re
|
||||
wiki.adm.auro.re
|
||||
wikijs.adm.auro.re
|
||||
www.adm.auro.re
|
||||
proxy-ovh.adm.auro.re
|
||||
matrix-services.adm.auro.re
|
||||
|
||||
[ovh_vm]
|
||||
serge.adm.auro.re
|
||||
|
@ -192,6 +193,8 @@ fl-4-2.borne.auro.re
|
|||
###############################################################################
|
||||
# Pacaterie
|
||||
|
||||
[pacaterie_server]
|
||||
|
||||
[pacaterie_pve]
|
||||
mordred.adm.auro.re
|
||||
titan.adm.auro.re
|
||||
|
@ -270,6 +273,7 @@ ee-2-1.borne.auro.re
|
|||
ee-2-2.borne.auro.re
|
||||
eo-0-1.borne.auro.re
|
||||
eo-2-1.borne.auro.re
|
||||
eo-3-1.borne.auro.re
|
||||
ep-0-1.borne.auro.re
|
||||
ep-1-1.borne.auro.re
|
||||
ep-1-2.borne.auro.re
|
||||
|
@ -279,6 +283,8 @@ ep-1-3.borne.auro.re
|
|||
###############################################################################
|
||||
# George Sand
|
||||
|
||||
[gs_server]
|
||||
|
||||
[gs_pve]
|
||||
lancelot.adm.auro.re
|
||||
odin.adm.auro.re
|
||||
|
@ -323,7 +329,6 @@ gd-1-2.borne.auro.re
|
|||
gd-2-1.borne.auro.re
|
||||
gd-3-1.borne.auro.re
|
||||
gd-4-1.borne.auro.re
|
||||
gd-4-2.borne.auro.re
|
||||
gd-5-1.borne.auro.re
|
||||
gd-5-2.borne.auro.re
|
||||
gd-garage-1.borne.auro.re
|
||||
|
@ -340,7 +345,6 @@ gf-0-1.borne.auro.re
|
|||
gf-1-1.borne.auro.re
|
||||
gf-2-1.borne.auro.re
|
||||
gf-3-1.borne.auro.re
|
||||
gf-3-2.borne.auro.re
|
||||
gf-4-1.borne.auro.re
|
||||
gf-5-1.borne.auro.re
|
||||
gg-5-1.borne.auro.re
|
||||
|
@ -349,6 +353,9 @@ gh-1-2.borne.auro.re
|
|||
|
||||
###############################################################################
|
||||
# Les Rives
|
||||
|
||||
[rives_server]
|
||||
|
||||
[rives_pve]
|
||||
thor.adm.auro.re
|
||||
loki.adm.auro.re
|
||||
|
@ -367,6 +374,8 @@ radius-rives.adm.auro.re
|
|||
routeur-rives.adm.auro.re
|
||||
|
||||
[rives_unifi]
|
||||
r1-0-1.borne.auro.re
|
||||
r1-0-2.borne.auro.re
|
||||
r1-1-1.borne.auro.re
|
||||
r1-1-2.borne.auro.re
|
||||
r1-1-3.borne.auro.re
|
||||
|
@ -383,6 +392,7 @@ r1-3-3.borne.auro.re
|
|||
r1-3-4.borne.auro.re
|
||||
r1-3-5.borne.auro.re
|
||||
r1-3-6.borne.auro.re
|
||||
r2-0-1.borne.auro.re
|
||||
r2-1-1.borne.auro.re
|
||||
r2-1-2.borne.auro.re
|
||||
r2-1-3.borne.auro.re
|
||||
|
@ -430,11 +440,14 @@ r3-4-8.borne.auro.re
|
|||
|
||||
# -aurore services
|
||||
[aurore:children]
|
||||
aurore_server
|
||||
aurore_pve
|
||||
aurore_vm
|
||||
|
||||
|
||||
# everything at ovh
|
||||
[ovh:children]
|
||||
ovh_server
|
||||
ovh_pve
|
||||
ovh_container
|
||||
ovh_vm
|
||||
|
@ -448,6 +461,7 @@ fleming_unifi
|
|||
|
||||
# everything at pacaterie
|
||||
[pacaterie:children]
|
||||
pacaterie_server
|
||||
pacaterie_pve
|
||||
pacaterie_vm
|
||||
pacaterie_unifi
|
||||
|
@ -461,12 +475,14 @@ edc_unifi
|
|||
|
||||
# everything at georgesand
|
||||
[gs:children]
|
||||
gs_server
|
||||
gs_pve
|
||||
gs_vm
|
||||
gs_unifi
|
||||
|
||||
# everything at Les Rives
|
||||
[rives:children]
|
||||
rives_server
|
||||
rives_pve
|
||||
rives_vm
|
||||
rives_unifi
|
||||
|
@ -489,6 +505,11 @@ rives_vm
|
|||
|
||||
# every server
|
||||
[server:children]
|
||||
ovh_server
|
||||
aurore_server
|
||||
pacaterie_server
|
||||
gs_server
|
||||
rives_server
|
||||
fleming_server
|
||||
edc_server
|
||||
|
||||
|
|
|
@ -14,6 +14,9 @@
|
|||
{{ groups['fleming_pve'] + groups['fleming_vm'] | list | sort }}
|
||||
prometheus_unifi_snmp_targets:
|
||||
- targets: "{{ groups['fleming_unifi'] | list | sort }}"
|
||||
prometheus_ipmi_targets:
|
||||
- targets: |
|
||||
{{ groups['fleming_pve'] + groups['fleming_server'] | list | sort }}
|
||||
roles:
|
||||
- prometheus
|
||||
|
||||
|
@ -34,6 +37,9 @@
|
|||
prometheus_ups_snmp_targets:
|
||||
- ups-pn-1.ups.auro.re
|
||||
- ups-ps-1.ups.auro.re
|
||||
prometheus_ipmi_targets:
|
||||
- targets: |
|
||||
{{ groups['pacaterie_pve'] + groups['pacaterie_server'] | list | sort }}
|
||||
roles:
|
||||
- prometheus
|
||||
|
||||
|
@ -54,6 +60,9 @@
|
|||
prometheus_targets:
|
||||
- targets: |
|
||||
{{ groups['edc_pve'] + groups['edc_vm'] + groups['edc_server'] | list | sort }}
|
||||
prometheus_ipmi_targets:
|
||||
- targets: |
|
||||
{{ groups['edc_pve'] + groups['edc_server'] | list | sort }}
|
||||
prometheus_unifi_snmp_targets:
|
||||
- targets: "{{ groups['edc_unifi'] | list | sort }}"
|
||||
roles:
|
||||
|
@ -77,6 +86,9 @@
|
|||
- ups-gk-1.ups.auro.re
|
||||
prometheus_pdu_snmp_targets:
|
||||
- pdu-ga-1.ups.auro.re
|
||||
prometheus_ipmi_targets:
|
||||
- targets: |
|
||||
{{ groups['gs_pve'] + groups['gs_server'] | list | sort }}
|
||||
roles:
|
||||
- prometheus
|
||||
|
||||
|
@ -98,6 +110,9 @@
|
|||
{{ groups['rives_pve'] + groups['rives_vm'] | list | sort }}
|
||||
prometheus_unifi_snmp_targets:
|
||||
- targets: "{{ groups['rives_unifi'] | list | sort }}"
|
||||
prometheus_ipmi_targets:
|
||||
- targets: |
|
||||
{{ groups['rives_pve'] + groups['rives_server'] | list | sort }}
|
||||
roles:
|
||||
- prometheus
|
||||
|
||||
|
@ -113,6 +128,9 @@
|
|||
prometheus_targets:
|
||||
- targets: |
|
||||
{{ groups['aurore_pve'] + groups['aurore_vm'] | list | sort }}
|
||||
prometheus_ipmi_targets:
|
||||
- targets: |
|
||||
{{ groups['aurore_pve'] + groups['aurore_server'] | list | sort }}
|
||||
prometheus_postgres_targets:
|
||||
- targets: |
|
||||
{{ groups['bdd'] + groups['radius'] | list | sort }}
|
||||
|
@ -146,7 +164,7 @@
|
|||
# Prometheus targets.json
|
||||
prometheus_targets:
|
||||
- targets: |
|
||||
{{ groups['ovh_pve'] + groups['ovh_vm'] | list | sort }}
|
||||
{{ groups['ovh_pve'] + groups['ovh_vm'] + groups['ovh_container'] | list | sort }}
|
||||
prometheus_postgres_targets:
|
||||
- targets:
|
||||
- bdd-ovh.adm.auro.re
|
||||
|
@ -180,8 +198,13 @@
|
|||
roles:
|
||||
- prometheus_postgres
|
||||
|
||||
# IPMI Exporters
|
||||
- hosts: server,pve
|
||||
roles:
|
||||
- prometheus_ipmi
|
||||
|
||||
|
||||
# Monitor all hosts
|
||||
- hosts: all,!edc_unifi,!fleming_unifi,!pacaterie_unifi,!gs_unifi,!rives_unifi,!aurore_testing_vm,!ovh_container
|
||||
- hosts: all,!edc_unifi,!fleming_unifi,!pacaterie_unifi,!gs_unifi,!rives_unifi,!aurore_testing_vm
|
||||
roles:
|
||||
- prometheus_node
|
||||
|
|
|
@ -113,6 +113,13 @@
|
|||
mode: 0644
|
||||
when: prometheus_pdu_snmp_targets is defined
|
||||
|
||||
- name: Configure Prometheus ipmi targets monitoring
|
||||
copy:
|
||||
content: "{{ prometheus_ipmi_targets | to_nice_json }}\n"
|
||||
dest: /etc/prometheus/targets_ipmi.json
|
||||
mode: 0644
|
||||
when: prometheus_ipmi_targets is defined
|
||||
|
||||
- name: Activate prometheus service
|
||||
systemd:
|
||||
name: prometheus
|
||||
|
|
|
@ -112,7 +112,7 @@ groups:
|
|||
rate(pg_stat_database_xact_rollback{datname!~"template.*"}[3m]) /
|
||||
rate(pg_stat_database_xact_commit{datname!~"template.*"}[3m])
|
||||
) * 100
|
||||
> 7
|
||||
> 20
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
|
|
|
@ -140,4 +140,22 @@ scrape_configs:
|
|||
- target_label: __address__
|
||||
replacement: 127.0.0.1:9116
|
||||
|
||||
- job_name: ipmi
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_ipmi.json'
|
||||
metrics_path: /metrics
|
||||
params:
|
||||
module: [default]
|
||||
relabel_configs:
|
||||
# Do not put :9290 in instance name, rather here
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- source_labels: [__param_target]
|
||||
target_label: __address__
|
||||
replacement: '$1:9290'
|
||||
|
||||
|
||||
...
|
||||
|
|
|
@ -11,7 +11,7 @@ groups:
|
|||
rules:
|
||||
|
||||
- alert: InstanceDown
|
||||
expr: up{instance!~".*.borne.auro.re$"} == 0
|
||||
expr: up{instance!~".*.borne.auro.re$", job="servers"} == 0
|
||||
for: 3m
|
||||
labels:
|
||||
severity: critical
|
||||
|
@ -50,7 +50,7 @@ groups:
|
|||
node_memory_SwapFree_bytes
|
||||
/ node_memory_SwapTotal_bytes
|
||||
)
|
||||
) * 100 >= 20
|
||||
) * 100 >= 50
|
||||
for: 3m
|
||||
labels:
|
||||
severity: warning
|
||||
|
|
|
@ -39,4 +39,5 @@ scrape_configs:
|
|||
- '{job="apc_pdu_snmp"}'
|
||||
- '{job="docker"}'
|
||||
- '{job="switch_snmp"}'
|
||||
- '{job="ipmi"}'
|
||||
...
|
||||
|
|
3
roles/prometheus_ipmi/files/ipmi_conf/ipmi.sh
Executable file
3
roles/prometheus_ipmi/files/ipmi_conf/ipmi.sh
Executable file
|
@ -0,0 +1,3 @@
|
|||
#!/bin/sh
|
||||
sudo /usr/sbin/$(basename $0) "$@"
|
||||
|
9
roles/prometheus_ipmi/files/prometheus
Normal file
9
roles/prometheus_ipmi/files/prometheus
Normal file
|
@ -0,0 +1,9 @@
|
|||
# Prometheus can be sudo for ipmi collector
|
||||
|
||||
prometheus ALL = NOPASSWD: /usr/sbin/ipmimonitoring,\
|
||||
/usr/sbin/ipmi-sensors,\
|
||||
/usr/sbin/ipmi-dcmi,\
|
||||
/usr/sbin/ipmi-raw,\
|
||||
/usr/sbin/bmc-info,\
|
||||
/usr/sbin/ipmi-chassis,\
|
||||
/usr/sbin/ipmi-sel
|
6
roles/prometheus_ipmi/handlers/main.yml
Normal file
6
roles/prometheus_ipmi/handlers/main.yml
Normal file
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
- name: Restart prometheus-ipmi-exporter
|
||||
service:
|
||||
name: prometheus-ipmi-exporter
|
||||
state: restarted
|
||||
daemon_reload: true
|
65
roles/prometheus_ipmi/tasks/main.yml
Normal file
65
roles/prometheus_ipmi/tasks/main.yml
Normal file
|
@ -0,0 +1,65 @@
|
|||
---
|
||||
- name: Install Prometheus ipmi-exporter
|
||||
apt:
|
||||
update_cache: true
|
||||
name: prometheus-ipmi-exporter
|
||||
register: apt_result
|
||||
retries: 3
|
||||
until: apt_result is succeeded
|
||||
|
||||
- name: Make Prometheus ipmi-exporter scrap on localhost only
|
||||
lineinfile:
|
||||
path: /etc/default/prometheus-ipmi-exporter
|
||||
regexp: '^ARGS='
|
||||
line: |
|
||||
ARGS="--config.file=/etc/prometheus/ipmi_local.yml --freeipmi.path='/etc/prometheus/ipmi_conf/'"
|
||||
notify: Restart prometheus-ipmi-exporter
|
||||
|
||||
- name: Define prometheus to be sudo for some command
|
||||
copy:
|
||||
src: files/prometheus
|
||||
dest: /etc/sudoers.d/prometheus
|
||||
owner: root
|
||||
group: root
|
||||
mode: u=r,g=r,o=
|
||||
notify: Restart prometheus-ipmi-exporter
|
||||
|
||||
- name: Override commands launchs by prometheus for ipmi
|
||||
copy:
|
||||
src: files/ipmi_conf
|
||||
dest: /etc/prometheus
|
||||
owner: prometheus
|
||||
group: prometheus
|
||||
mode: u=rx,g=r,o=
|
||||
notify: Restart prometheus-ipmi-exporter
|
||||
|
||||
- name: Override commands launchs by prometheus for ipmi
|
||||
file:
|
||||
src: '/etc/prometheus/ipmi_conf/ipmi.sh'
|
||||
dest: '/etc/prometheus/ipmi_conf/{{ item }}'
|
||||
state: link
|
||||
force: true
|
||||
owner: prometheus
|
||||
group: prometheus
|
||||
mode: u=rx,g=,o=
|
||||
loop:
|
||||
- bmc-info
|
||||
- ipmi-chassis
|
||||
- ipmi-dcmi
|
||||
- ipmimonitoring
|
||||
- ipmi-raw
|
||||
- ipmi-sensors
|
||||
- ipmi-sel
|
||||
notify: Restart prometheus-ipmi-exporter
|
||||
|
||||
- name: Activate prometheus-ipmi-exporter service
|
||||
systemd:
|
||||
name: prometheus-ipmi-exporter
|
||||
enabled: true
|
||||
state: started
|
||||
|
||||
- name: Restart prometheus-ipmi-exporter
|
||||
systemd:
|
||||
state: restarted
|
||||
daemon_reload: true
|
||||
name: prometheus-ipmi-exporter
|
Loading…
Reference in a new issue