add blackbox alerts
This commit is contained in:
parent
5f69245b17
commit
b301ce5eaf
6 changed files with 66 additions and 5 deletions
47
roles/prometheus-blackbox-exporter/files/alerts-blackbox.yml
Normal file
47
roles/prometheus-blackbox-exporter/files/alerts-blackbox.yml
Normal file
|
@ -0,0 +1,47 @@
|
|||
---
|
||||
groups:
|
||||
- name: BlackBoxAllInstances
|
||||
rules:
|
||||
|
||||
- alert: SiteUp
|
||||
expr: probe_success{job="blackbox http-down"} == 1
|
||||
annotations:
|
||||
title: '{{ $labels.instance }} is UP!'
|
||||
description: '{{ $labels.instance }} is now up!'
|
||||
labels:
|
||||
value: "{{ $value }}"
|
||||
severity: 'critical'
|
||||
|
||||
- alert: SiteDown
|
||||
expr: probe_success{job="blackbox http-up"} == 0
|
||||
for: 5m
|
||||
annotations:
|
||||
title: '{{ $labels.instance }} is Down'
|
||||
description: >-
|
||||
{{ $labels.instance }} has been down for more than 5 minutes.
|
||||
labels:
|
||||
value: "{{ $value }}"
|
||||
severity: 'warning'
|
||||
|
||||
- alert: CertExpLess30days
|
||||
expr: (probe_ssl_earliest_cert_expiry{job="blackbox internal tls"}-time()) < 2592000
|
||||
annotations:
|
||||
title: '{{ $labels.cname }} will expire soon'
|
||||
description: >-
|
||||
The certificate {{ $labels.cname }} on {{ $labels.instance }} will expire in
|
||||
{{ $value | humanizeDuration }}, it's time to renew it.
|
||||
labels:
|
||||
value: "{{ $value }}"
|
||||
severity: 'warning'
|
||||
|
||||
- alert: CertExpLess10days
|
||||
expr: (probe_ssl_earliest_cert_expiry{job="blackbox internal tls"}-time()) < 864000
|
||||
annotations:
|
||||
title: '{{ $labels.cname }} expiracy is imminent!'
|
||||
description: >-
|
||||
The certificate {{ $labels.cname }} on {{ $labels.instance }} will expire in
|
||||
{{ $value | humanizeDuration }}!
|
||||
labels:
|
||||
value: "{{ $value }}"
|
||||
severity: 'critical'
|
||||
...
|
|
@ -3,3 +3,8 @@
|
|||
systemd:
|
||||
name: prometheus-blackbox-exporter.service
|
||||
state: restarted
|
||||
|
||||
- name: Restart prometheus
|
||||
systemd:
|
||||
name: prometheus
|
||||
state: restarted
|
||||
|
|
|
@ -85,3 +85,12 @@
|
|||
dest: "/etc/nginx/sites-enabled/internal-blackbox"
|
||||
state: link
|
||||
force: yes
|
||||
|
||||
- name: Add alert rules for node on the prometheus server
|
||||
copy:
|
||||
src: alerts-blackbox.yml
|
||||
dest: /etc/prometheus/alertsblackbox.yml
|
||||
owner: prometheus
|
||||
group: prometheus
|
||||
mode: u=rw,g=r,o=r
|
||||
notify: Restart prometheus
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
groups:
|
||||
- name: AllInstances
|
||||
- name: NodeAllInstances
|
||||
rules:
|
||||
|
||||
- alert: InstanceDown
|
||||
|
|
|
@ -71,9 +71,9 @@
|
|||
force: no
|
||||
notify: Restart prometheus
|
||||
loop:
|
||||
- blackbox-http-external-down
|
||||
- blackbox-http-external-up
|
||||
- blackbox-https-internal
|
||||
- blackbox-http-down
|
||||
- blackbox-http-up
|
||||
- blackbox-tls-internal
|
||||
- node
|
||||
|
||||
- name: Copy the web-config folder
|
||||
|
|
|
@ -64,7 +64,7 @@ scrape_configs:
|
|||
cert_file: '/etc/prometheus/prometheus-{{ lan_address }}.crt'
|
||||
key_file: '/etc/prometheus/prometheus-{{ lan_address }}.key'
|
||||
|
||||
{% for target_type in ('http-external-up', 'http-external-down') %}
|
||||
{% for target_type in ('http-up', 'http-down') %}
|
||||
- job_name: blackbox {{ target_type }}
|
||||
metrics_path: /probe
|
||||
params:
|
||||
|
|
Loading…
Reference in a new issue