add first config for alertmanager
This commit is contained in:
parent
6e28abc1e7
commit
dd550700d0
7 changed files with 124 additions and 3 deletions
|
@ -3,6 +3,7 @@
|
|||
- hosts: prometheus_servers
|
||||
roles:
|
||||
- prometheus
|
||||
- prometheus-alert-manager
|
||||
- grafana
|
||||
|
||||
- hosts: all, !tests,
|
||||
|
|
5
roles/prometheus-alert-manager/handlers/main.yml
Normal file
5
roles/prometheus-alert-manager/handlers/main.yml
Normal file
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
- name: Restart Alertmanager
|
||||
systemd:
|
||||
name: prometheus-alertmanager.service
|
||||
state: restarted
|
41
roles/prometheus-alert-manager/tasks/main.yml
Normal file
41
roles/prometheus-alert-manager/tasks/main.yml
Normal file
|
@ -0,0 +1,41 @@
|
|||
---
|
||||
- name: Install Prometheus Alert Manager
|
||||
apt:
|
||||
name:
|
||||
- prometheus-alertmanager
|
||||
state: latest
|
||||
update_cache: true
|
||||
register: apt_result
|
||||
retries: 3
|
||||
until: apt_result is succeeded
|
||||
|
||||
- name: Setup the arguments for alertmanager
|
||||
template:
|
||||
src: prometheus-alertmanager
|
||||
dest: /etc/default/prometheus-alertmanager
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
notify: Restart Alertmanager
|
||||
vars:
|
||||
args:
|
||||
- name: web.listen-address
|
||||
value: "127.0.0.1:9093"
|
||||
|
||||
#- name: Copy the CA cert
|
||||
# copy:
|
||||
# content: "{{ ca_cert }}"
|
||||
# dest: /etc/?/ca.crt
|
||||
# notify: Restart Alertmanager
|
||||
#
|
||||
#- name: Generate certificate
|
||||
# include_role:
|
||||
# name: generate-cert
|
||||
# vars:
|
||||
# directory: /etc/?/
|
||||
# cname: "alertmanager-{{ lan_address }}"
|
||||
# owner: ?
|
||||
# group: ?
|
||||
# key_mode: u=rw,g=,o=
|
||||
# subject_alt_name: "IP:{{ lan_address }}"
|
||||
## Need an equivalent to notify here
|
|
@ -0,0 +1,75 @@
|
|||
{{ ansible_managed | comment }}
|
||||
|
||||
# Set the command-line arguments to pass to the server.
|
||||
{% if not args %}
|
||||
ARGS=""
|
||||
{% else %}
|
||||
ARGS="\
|
||||
{% for arg in args %}
|
||||
--{{ arg.name }}={{ arg.value }} \
|
||||
{% endfor %}
|
||||
"
|
||||
{% endif %}
|
||||
|
||||
# The alert manager supports the following options:
|
||||
|
||||
# --config.file="/etc/prometheus/alertmanager.yml"
|
||||
# Alertmanager configuration file name.
|
||||
# --storage.path="/var/lib/prometheus/alertmanager/"
|
||||
# Base path for data storage.
|
||||
# --data.retention=120h
|
||||
# How long to keep data for.
|
||||
# --alerts.gc-interval=30m
|
||||
# Interval between alert GC.
|
||||
# --log.level=info
|
||||
# Only log messages with the given severity or above.
|
||||
# --web.external-url=WEB.EXTERNAL-URL
|
||||
# The URL under which Alertmanager is externally reachable (for example,
|
||||
# if Alertmanager is served via a reverse proxy). Used for generating
|
||||
# relative and absolute links back to Alertmanager itself. If the URL has
|
||||
# a path portion, it will be used to prefix all HTTP endpoints served by
|
||||
# Alertmanager. If omitted, relevant URL components will be derived
|
||||
# automatically.
|
||||
# --web.route-prefix=WEB.ROUTE-PREFIX
|
||||
# Prefix for the internal routes of web endpoints. Defaults to path of
|
||||
# --web.external-url.
|
||||
# --web.listen-address=":9093"
|
||||
# Address to listen on for the web interface and API.
|
||||
# --web.ui-path="/usr/share/prometheus/alertmanager/ui/"
|
||||
# Path to static UI directory.
|
||||
# --template.default="/usr/share/prometheus/alertmanager/default.tmpl"
|
||||
# Path to default notification template.
|
||||
# --cluster.listen-address="0.0.0.0:9094"
|
||||
# Listen address for cluster.
|
||||
# --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS
|
||||
# Explicit address to advertise in cluster.
|
||||
# --cluster.peer=CLUSTER.PEER ...
|
||||
# Initial peers (may be repeated).
|
||||
# --cluster.peer-timeout=15s
|
||||
# Time to wait between peers to send notifications.
|
||||
# --cluster.gossip-interval=200ms
|
||||
# Interval between sending gossip messages. By lowering this value (more
|
||||
# frequent) gossip messages are propagated across the cluster more
|
||||
# quickly at the expense of increased bandwidth.
|
||||
# --cluster.pushpull-interval=1m0s
|
||||
# Interval for gossip state syncs. Setting this interval lower (more
|
||||
# frequent) will increase convergence speeds across larger clusters at
|
||||
# the expense of increased bandwidth usage.
|
||||
# --cluster.tcp-timeout=10s Timeout for establishing a stream connection
|
||||
# with a remote node for a full state sync, and for stream read and write
|
||||
# operations.
|
||||
# --cluster.probe-timeout=500ms
|
||||
# Timeout to wait for an ack from a probed node before assuming it is
|
||||
# unhealthy. This should be set to 99-percentile of RTT (round-trip time)
|
||||
# on your network.
|
||||
# --cluster.probe-interval=1s
|
||||
# Interval between random node probes. Setting this lower (more frequent)
|
||||
# will cause the cluster to detect failed nodes more quickly at the
|
||||
# expense of increased bandwidth usage.
|
||||
# --cluster.settle-timeout=1m0s
|
||||
# Maximum time to wait for cluster connections to settle before
|
||||
# evaluating notifications.
|
||||
# --cluster.reconnect-interval=10s
|
||||
# Interval between attempting to reconnect to lost peers.
|
||||
# --cluster.reconnect-timeout=6h0m0s
|
||||
# Length of time to attempt to reconnect to a lost peer.
|
|
@ -3,7 +3,6 @@
|
|||
apt:
|
||||
name:
|
||||
- prometheus
|
||||
- prometheus-alertmanager
|
||||
- prometheus-pushgateway
|
||||
state: latest
|
||||
update_cache: true
|
||||
|
|
|
@ -8,6 +8,6 @@ server {
|
|||
ssl_verify_client on;
|
||||
|
||||
location / {
|
||||
proxy_pass 127.0.0.1:9090;
|
||||
proxy_pass http://127.0.0.1:9090;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ global:
|
|||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: ['localhost:9093']
|
||||
- targets: ['{{ lan_address }}:9093']
|
||||
|
||||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||
rule_files:
|
||||
|
|
Loading…
Reference in a new issue