add first config for alertmanager
This commit is contained in:
parent
6e28abc1e7
commit
dd550700d0
7 changed files with 124 additions and 3 deletions
|
@ -3,6 +3,7 @@
|
||||||
- hosts: prometheus_servers
|
- hosts: prometheus_servers
|
||||||
roles:
|
roles:
|
||||||
- prometheus
|
- prometheus
|
||||||
|
- prometheus-alert-manager
|
||||||
- grafana
|
- grafana
|
||||||
|
|
||||||
- hosts: all, !tests,
|
- hosts: all, !tests,
|
||||||
|
|
5
roles/prometheus-alert-manager/handlers/main.yml
Normal file
5
roles/prometheus-alert-manager/handlers/main.yml
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
- name: Restart Alertmanager
|
||||||
|
systemd:
|
||||||
|
name: prometheus-alertmanager.service
|
||||||
|
state: restarted
|
41
roles/prometheus-alert-manager/tasks/main.yml
Normal file
41
roles/prometheus-alert-manager/tasks/main.yml
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
---
|
||||||
|
- name: Install Prometheus Alert Manager
|
||||||
|
apt:
|
||||||
|
name:
|
||||||
|
- prometheus-alertmanager
|
||||||
|
state: latest
|
||||||
|
update_cache: true
|
||||||
|
register: apt_result
|
||||||
|
retries: 3
|
||||||
|
until: apt_result is succeeded
|
||||||
|
|
||||||
|
- name: Setup the arguments for alertmanager
|
||||||
|
template:
|
||||||
|
src: prometheus-alertmanager
|
||||||
|
dest: /etc/default/prometheus-alertmanager
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: '0644'
|
||||||
|
notify: Restart Alertmanager
|
||||||
|
vars:
|
||||||
|
args:
|
||||||
|
- name: web.listen-address
|
||||||
|
value: "127.0.0.1:9093"
|
||||||
|
|
||||||
|
#- name: Copy the CA cert
|
||||||
|
# copy:
|
||||||
|
# content: "{{ ca_cert }}"
|
||||||
|
# dest: /etc/?/ca.crt
|
||||||
|
# notify: Restart Alertmanager
|
||||||
|
#
|
||||||
|
#- name: Generate certificate
|
||||||
|
# include_role:
|
||||||
|
# name: generate-cert
|
||||||
|
# vars:
|
||||||
|
# directory: /etc/?/
|
||||||
|
# cname: "alertmanager-{{ lan_address }}"
|
||||||
|
# owner: ?
|
||||||
|
# group: ?
|
||||||
|
# key_mode: u=rw,g=,o=
|
||||||
|
# subject_alt_name: "IP:{{ lan_address }}"
|
||||||
|
## Need an equivalent to notify here
|
|
@ -0,0 +1,75 @@
|
||||||
|
{{ ansible_managed | comment }}
|
||||||
|
|
||||||
|
# Set the command-line arguments to pass to the server.
|
||||||
|
{% if not args %}
|
||||||
|
ARGS=""
|
||||||
|
{% else %}
|
||||||
|
ARGS="\
|
||||||
|
{% for arg in args %}
|
||||||
|
--{{ arg.name }}={{ arg.value }} \
|
||||||
|
{% endfor %}
|
||||||
|
"
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
# The alert manager supports the following options:
|
||||||
|
|
||||||
|
# --config.file="/etc/prometheus/alertmanager.yml"
|
||||||
|
# Alertmanager configuration file name.
|
||||||
|
# --storage.path="/var/lib/prometheus/alertmanager/"
|
||||||
|
# Base path for data storage.
|
||||||
|
# --data.retention=120h
|
||||||
|
# How long to keep data for.
|
||||||
|
# --alerts.gc-interval=30m
|
||||||
|
# Interval between alert GC.
|
||||||
|
# --log.level=info
|
||||||
|
# Only log messages with the given severity or above.
|
||||||
|
# --web.external-url=WEB.EXTERNAL-URL
|
||||||
|
# The URL under which Alertmanager is externally reachable (for example,
|
||||||
|
# if Alertmanager is served via a reverse proxy). Used for generating
|
||||||
|
# relative and absolute links back to Alertmanager itself. If the URL has
|
||||||
|
# a path portion, it will be used to prefix all HTTP endpoints served by
|
||||||
|
# Alertmanager. If omitted, relevant URL components will be derived
|
||||||
|
# automatically.
|
||||||
|
# --web.route-prefix=WEB.ROUTE-PREFIX
|
||||||
|
# Prefix for the internal routes of web endpoints. Defaults to path of
|
||||||
|
# --web.external-url.
|
||||||
|
# --web.listen-address=":9093"
|
||||||
|
# Address to listen on for the web interface and API.
|
||||||
|
# --web.ui-path="/usr/share/prometheus/alertmanager/ui/"
|
||||||
|
# Path to static UI directory.
|
||||||
|
# --template.default="/usr/share/prometheus/alertmanager/default.tmpl"
|
||||||
|
# Path to default notification template.
|
||||||
|
# --cluster.listen-address="0.0.0.0:9094"
|
||||||
|
# Listen address for cluster.
|
||||||
|
# --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS
|
||||||
|
# Explicit address to advertise in cluster.
|
||||||
|
# --cluster.peer=CLUSTER.PEER ...
|
||||||
|
# Initial peers (may be repeated).
|
||||||
|
# --cluster.peer-timeout=15s
|
||||||
|
# Time to wait between peers to send notifications.
|
||||||
|
# --cluster.gossip-interval=200ms
|
||||||
|
# Interval between sending gossip messages. By lowering this value (more
|
||||||
|
# frequent) gossip messages are propagated across the cluster more
|
||||||
|
# quickly at the expense of increased bandwidth.
|
||||||
|
# --cluster.pushpull-interval=1m0s
|
||||||
|
# Interval for gossip state syncs. Setting this interval lower (more
|
||||||
|
# frequent) will increase convergence speeds across larger clusters at
|
||||||
|
# the expense of increased bandwidth usage.
|
||||||
|
# --cluster.tcp-timeout=10s Timeout for establishing a stream connection
|
||||||
|
# with a remote node for a full state sync, and for stream read and write
|
||||||
|
# operations.
|
||||||
|
# --cluster.probe-timeout=500ms
|
||||||
|
# Timeout to wait for an ack from a probed node before assuming it is
|
||||||
|
# unhealthy. This should be set to 99-percentile of RTT (round-trip time)
|
||||||
|
# on your network.
|
||||||
|
# --cluster.probe-interval=1s
|
||||||
|
# Interval between random node probes. Setting this lower (more frequent)
|
||||||
|
# will cause the cluster to detect failed nodes more quickly at the
|
||||||
|
# expense of increased bandwidth usage.
|
||||||
|
# --cluster.settle-timeout=1m0s
|
||||||
|
# Maximum time to wait for cluster connections to settle before
|
||||||
|
# evaluating notifications.
|
||||||
|
# --cluster.reconnect-interval=10s
|
||||||
|
# Interval between attempting to reconnect to lost peers.
|
||||||
|
# --cluster.reconnect-timeout=6h0m0s
|
||||||
|
# Length of time to attempt to reconnect to a lost peer.
|
|
@ -3,7 +3,6 @@
|
||||||
apt:
|
apt:
|
||||||
name:
|
name:
|
||||||
- prometheus
|
- prometheus
|
||||||
- prometheus-alertmanager
|
|
||||||
- prometheus-pushgateway
|
- prometheus-pushgateway
|
||||||
state: latest
|
state: latest
|
||||||
update_cache: true
|
update_cache: true
|
||||||
|
|
|
@ -8,6 +8,6 @@ server {
|
||||||
ssl_verify_client on;
|
ssl_verify_client on;
|
||||||
|
|
||||||
location / {
|
location / {
|
||||||
proxy_pass 127.0.0.1:9090;
|
proxy_pass http://127.0.0.1:9090;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,7 +14,7 @@ global:
|
||||||
alerting:
|
alerting:
|
||||||
alertmanagers:
|
alertmanagers:
|
||||||
- static_configs:
|
- static_configs:
|
||||||
- targets: ['localhost:9093']
|
- targets: ['{{ lan_address }}:9093']
|
||||||
|
|
||||||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||||
rule_files:
|
rule_files:
|
||||||
|
|
Loading…
Reference in a new issue