add blackbox alerts

This commit is contained in:
histausse 2021-10-10 01:33:08 +02:00
parent 5f69245b17
commit b301ce5eaf
Signed by: histausse
GPG key ID: 67486F107F62E9E9
6 changed files with 66 additions and 5 deletions

View file

@ -0,0 +1,47 @@
---
groups:
- name: BlackBoxAllInstances
rules:
- alert: SiteUp
expr: probe_success{job="blackbox http-down"} == 1
annotations:
title: '{{ $labels.instance }} is UP!'
description: '{{ $labels.instance }} is now up!'
labels:
value: "{{ $value }}"
severity: 'critical'
- alert: SiteDown
expr: probe_success{job="blackbox http-up"} == 0
for: 5m
annotations:
title: '{{ $labels.instance }} is Down'
description: >-
{{ $labels.instance }} has been down for more than 5 minutes.
labels:
value: "{{ $value }}"
severity: 'warning'
- alert: CertExpLess30days
expr: (probe_ssl_earliest_cert_expiry{job="blackbox internal tls"}-time()) < 2592000
annotations:
title: '{{ $labels.cname }} will expire soon'
description: >-
The certificate {{ $labels.cname }} on {{ $labels.instance }} will expire in
{{ $value | humanizeDuration }}, it's time to renew it.
labels:
value: "{{ $value }}"
severity: 'warning'
- alert: CertExpLess10days
expr: (probe_ssl_earliest_cert_expiry{job="blackbox internal tls"}-time()) < 864000
annotations:
title: '{{ $labels.cname }} expiracy is imminent!'
description: >-
The certificate {{ $labels.cname }} on {{ $labels.instance }} will expire in
{{ $value | humanizeDuration }}!
labels:
value: "{{ $value }}"
severity: 'critical'
...

View file

@ -3,3 +3,8 @@
systemd:
name: prometheus-blackbox-exporter.service
state: restarted
- name: Restart prometheus
systemd:
name: prometheus
state: restarted

View file

@ -85,3 +85,12 @@
dest: "/etc/nginx/sites-enabled/internal-blackbox"
state: link
force: yes
- name: Add alert rules for node on the prometheus server
copy:
src: alerts-blackbox.yml
dest: /etc/prometheus/alertsblackbox.yml
owner: prometheus
group: prometheus
mode: u=rw,g=r,o=r
notify: Restart prometheus

View file

@ -1,6 +1,6 @@
---
groups:
- name: AllInstances
- name: NodeAllInstances
rules:
- alert: InstanceDown

View file

@ -71,9 +71,9 @@
force: no
notify: Restart prometheus
loop:
- blackbox-http-external-down
- blackbox-http-external-up
- blackbox-https-internal
- blackbox-http-down
- blackbox-http-up
- blackbox-tls-internal
- node
- name: Copy the web-config folder

View file

@ -64,7 +64,7 @@ scrape_configs:
cert_file: '/etc/prometheus/prometheus-{{ lan_address }}.crt'
key_file: '/etc/prometheus/prometheus-{{ lan_address }}.key'
{% for target_type in ('http-external-up', 'http-external-down') %}
{% for target_type in ('http-up', 'http-down') %}
- job_name: blackbox {{ target_type }}
metrics_path: /probe
params: