diff --git a/README.md b/README.md index e64776c..6b7d611 100644 --- a/README.md +++ b/README.md @@ -86,15 +86,11 @@ On va utiliser plutôt `ProxyJump`. Dans la configuration SSH : ``` -# Use a key to log on all Crans servers -# and use a proxy server +# Use a proxy jump server to log on all Aurore inventory Host 10.128.0.* *.adm.auro.re ProxyJump passerelle.auro.re ``` -Il faut savoir que depuis Ansible 2.5, des connexions persistantes sont créées -vers les serveurs puis détruites à la fin de l'exécution. - Il faut sa clé SSH configurée sur le serveur que l'on déploit. ```bash ssh-copy-id proxy.adm.auro.re @@ -102,6 +98,8 @@ ssh-copy-id proxy.adm.auro.re ### Lancer Ansible +Il faut `python3-netaddr` sur sa machine. + Pour tester le playbook `base.yml` : ```bash ansible-playbook --ask-vault-pass base.yml --check @@ -112,3 +110,11 @@ Vous pouvez ensuite enlever `--check` si vous voulez appliquer les changements ! Si vous avez des soucis de fingerprint ECDSA, vous pouvez ignorer une première fois (dangereux !) : `ANSIBLE_HOST_KEY_CHECKING=0 ansible-playbook...`. +### Ajouter tous les empruntes de serveur + +```bash +#!/bin/bash +for ip in `cat hosts|grep .adm.auro.re`; do + ssh-copy-id -i ~/.ssh/id_rsa.pub $ip +done +``` diff --git a/base.yml b/base.yml index bafc56b..191d80d 100644 --- a/base.yml +++ b/base.yml @@ -1,12 +1,12 @@ --- # Put a common configuration on all servers -- hosts: all +- hosts: all,!unifi roles: - baseconfig - basesecurity # Plug LDAP on all servers -- hosts: all +- hosts: all,!unifi roles: - ldap-client diff --git a/group_vars/all/vault.yml b/group_vars/all/vault.yml index 8f0765f..4d728f7 100644 --- a/group_vars/all/vault.yml +++ b/group_vars/all/vault.yml @@ -1,121 +1,124 @@ $ANSIBLE_VAULT;1.1;AES256 -63383763633231303965383737313737656433616465303432663764393065613739663934646462 -6433653264666237643634303032653438323363646433370a333663623139343464376432313337 +353533336133303463356663323736326332 diff --git a/hosts b/hosts index f1ac5fd..8718a1d 100644 --- a/hosts +++ b/hosts @@ -2,7 +2,6 @@ # How to name your server ? # > We name servers according to location, then type. -# > So all containers at OVH are in ovh-container. # > Then we regroup everything in global geographic and type groups. [aurore_pve] @@ -49,13 +48,14 @@ dhcp-fleming-backup.adm.auro.re dns-fleming.adm.auro.re dns-fleming-backup.adm.auro.re prometheus-fleming.adm.auro.re +#prometheus-fleming-fo.adm.auro.re radius-fleming.adm.auro.re radius-fleming-backup.adm.auro.re unifi-fleming.adm.auro.re routeur-fleming.adm.auro.re routeur-fleming-backup.adm.auro.re -[pacaterie_pve] +[pacaterie_physical] mordred.adm.auro.re titan.adm.auro.re @@ -67,6 +67,7 @@ dhcp-pacaterie-backup.adm.auro.re dns-pacaterie.adm.auro.re dns-pacaterie-backup.adm.auro.re prometheus-pacaterie.adm.auro.re +#prometheus-pacaterie-fo.adm.auro.re radius-pacaterie.adm.auro.re radius-pacaterie-backup.adm.auro.re unifi-pacaterie.adm.auro.re @@ -100,23 +101,21 @@ dhcp-gs.adm.auro.re # everything at ovh [ovh:children] -ovh_pve +ovh_physical ovh_container ovh_vm -# everything at ovh_testing -[ovh_testing:children] -ovh_testing_vm - # everything at fleming [fleming:children] -fleming_pve +fleming_physical fleming_vm +fleming_unifi # everything at pacaterie [pacaterie:children] -pacaterie_pve +pacaterie_physical pacaterie_vm +pacaterie_unifi # everything at edc [edc:children] diff --git a/monitoring.yml b/monitoring.yml index d962547..76e063b 100644 --- a/monitoring.yml +++ b/monitoring.yml @@ -1,30 +1,61 @@ --- -# Deploy Prometheus -- hosts: prometheus-fleming.adm.auro.re +- hosts: prometheus-fleming.adm.auro.re,prometheus-fleming-fo.adm.auro.re vars: + prometheus_alertmanager: docker-ovh.adm.auro.re:9093 + snmp_unifi_password: "{{ vault_snmp_unifi_password }}" + + # Prometheus targets.json + prometheus_targets: + - targets: | + {{ groups['fleming_physical'] + groups['fleming_vm'] | list | sort }} + prometheus_unifi_snmp_targets: + - targets: "{{ groups['fleming_unifi'] | list | sort }}" + roles: + - prometheus + +- hosts: prometheus-pacaterie.adm.auro.re,prometheus-pacaterie-fo.adm.auro.re + vars: + prometheus_alertmanager: docker-ovh.adm.auro.re:9093 + snmp_unifi_password: "{{ vault_snmp_unifi_password }}" + # Prometheus targets.json prometheus_targets: - - labels: {job: node} - targets: "{{ groups['fleming'] | map('replace', '.re', '.re:9100') | list | sort }}" - - labels: {job: prometheus} - targets: ['localhost:9090'] + - targets: | + {{ groups['pacaterie_physical'] + groups['pacaterie_vm'] | list | sort }} + prometheus_unifi_snmp_targets: + - targets: "{{ groups['pacaterie_unifi'] | list | sort }}" roles: - prometheus - - prometheus-alertmanager -- hosts: prometheus-pacaterie.adm.auro.re +- hosts: prometheus-edc.adm.auro.re,prometheus-edc-fo.adm.auro.re vars: + prometheus_alertmanager: docker-ovh.adm.auro.re:9093 + snmp_unifi_password: "{{ vault_snmp_unifi_password }}" + + # Prometheus targets.json + prometheus_targets: + - targets: | + {{ groups['edc_physical'] + groups['edc_vm'] | list | sort }} + prometheus_unifi_snmp_targets: + - targets: "{{ groups['edc_unifi'] | list | sort }}" + roles: + - prometheus + +- hosts: prometheus-georgesand.adm.auro.re,prometheus-georgesand-fo.adm.auro.re + vars: + prometheus_alertmanager: docker-ovh.adm.auro.re:9093 + snmp_unifi_password: "{{ vault_snmp_unifi_password }}" + # Prometheus targets.json prometheus_targets: - - labels: {job: node} - targets: "{{ groups['pacaterie'] | map('replace', '.re', '.re:9100') | list | sort }}" - - labels: {job: prometheus} - targets: ['localhost:9090'] + - targets: | + {{ groups['georgesand_physical'] + groups['georgesand_vm'] | list | sort }} + prometheus_unifi_snmp_targets: + - targets: "{{ groups['georgesand_unifi'] | list | sort }}" roles: - prometheus - - prometheus-alertmanager # Monitor all hosts -- hosts: fleming,pacaterie +- hosts: all,!unifi,!ovh roles: - prometheus-node diff --git a/proxmox.yml b/proxmox.yml index fb51466..98af1cc 100644 --- a/proxmox.yml +++ b/proxmox.yml @@ -163,5 +163,269 @@ prompt: "Enter LDAP password for your user" private: true - roles: - - proxmox-vm + tasks: + - name: Define a virtual machine in Proxmox + proxmox_kvm: + api_user: "{{ ansible_user_id }}@pam" + api_password: "{{ password }}" + api_host: "{{ item.virtu }}.adm.auro.re" + name: "{{ item.name }}" + node: "{{ item.virtu }}" + scsihw: virtio-scsi-pci + scsi: '{"scsi0":"{{ item.virtu }}:{{ item.disksize }},format=raw"}' + sata: '{"sata0":"local:iso/{{ item.installiso }},media=cdrom"}' + net: '{"net0":"virtio,bridge=vmbr2"}' # Adm only by default + cores: "{{ item.cores }}" + memory: "{{ item.memory }}" + balloon: "{{ item.memory // 2 }}" + bios: seabios # Ansible module doesn't support UEFI boot disk + loop: + # Réseau Fleming + - name: ldap-replica-fleming + virtu: freya + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: dhcp-fleming + virtu: freya + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: dns-fleming + virtu: freya + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: prometheus-fleming + virtu: freya + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: radius-fleming + virtu: freya + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: unifi-fleming + virtu: freya + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-9.9.0-amd64-netinst.iso + - name: routeur-fleming + virtu: freya + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + + - name: ldap-replica-fleming-fo + virtu: marki + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: dhcp-fleming-fo + virtu: marki + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: dns-fleming-fo + virtu: marki + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: prometheus-fleming-fo + virtu: marki + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: radius-fleming-fo + virtu: marki + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: routeur-fleming-fo + virtu: marki + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + + # Réseau Pacaterie + - name: ldap-replica-pacaterie + virtu: mordred + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: dhcp-pacaterie + virtu: mordred + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: dns-pacaterie + virtu: mordred + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: prometheus-pacaterie + virtu: mordred + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: radius-pacaterie + virtu: mordred + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: unifi-pacaterie + virtu: mordred + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-9.9.0-amd64-netinst.iso + - name: routeur-pacaterie + virtu: mordred + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + + - name: ldap-replica-pacaterie-fo + virtu: titan + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: dhcp-pacaterie-fo + virtu: titan + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: dns-pacaterie-fo + virtu: titan + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: prometheus-pacaterie-fo + virtu: titan + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: radius-pacaterie-fo + virtu: titan + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: routeur-pacaterie-fo + virtu: titan + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + + # Réseau EDC + - name: ldap-replica-edc + virtu: chapalux + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: dhcp-edc + virtu: chapalux + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: dns-edc + virtu: chapalux + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: prometheus-edc + virtu: chapalux + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: radius-edc + virtu: chapalux + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: unifi-edc + virtu: chapalux + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-9.9.0-amd64-netinst.iso + - name: routeur-edc + virtu: chapalux + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + + # Réseau George Sand + - name: ldap-replica-georgesand + virtu: perceval + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: dhcp-georgesand + virtu: perceval + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: dns-georgesand + virtu: perceval + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: prometheus-georgesand + virtu: perceval + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: radius-georgesand + virtu: perceval + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso + - name: unifi-georgesand + virtu: perceval + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-9.9.0-amd64-netinst.iso + - name: routeur-georgesand + virtu: perceval + cores: 2 # 2 mimimum, 10 maximum + memory: 1024 # M + disksize: 16 # G + installiso: debian-10.0.0-amd64-netinst.iso diff --git a/roles/baseconfig/tasks/main.yml b/roles/baseconfig/tasks/main.yml index 5249244..bf88ae2 100644 --- a/roles/baseconfig/tasks/main.yml +++ b/roles/baseconfig/tasks/main.yml @@ -23,6 +23,7 @@ - git # code versioning - less # i like cats - screen # Vulcain asked for this + - lsb-release update_cache: true register: apt_result retries: 3 diff --git a/roles/ldap-client/tasks/install_ldap.yml b/roles/ldap-client/tasks/install_ldap.yml index 5e8dae3..2f81e11 100644 --- a/roles/ldap-client/tasks/install_ldap.yml +++ b/roles/ldap-client/tasks/install_ldap.yml @@ -26,7 +26,7 @@ lineinfile: dest: /etc/nsswitch.conf regexp: "^{{ item }}:" - line: "{{ item }}: files ldap" + line: "{{ item }}: files ldap" loop: - passwd - group diff --git a/roles/prometheus-alertmanager/handlers/main.yml b/roles/prometheus-alertmanager/handlers/main.yml deleted file mode 100644 index 3ddbf93..0000000 --- a/roles/prometheus-alertmanager/handlers/main.yml +++ /dev/null @@ -1,5 +0,0 @@ ---- -- name: Restart Prometheus Alertmanager - service: - name: prometheus-alertmanager - state: restarted diff --git a/roles/prometheus-alertmanager/tasks/main.yml b/roles/prometheus-alertmanager/tasks/main.yml deleted file mode 100644 index b65a295..0000000 --- a/roles/prometheus-alertmanager/tasks/main.yml +++ /dev/null @@ -1,14 +0,0 @@ ---- -- name: Install Prometheus Alertmanager - apt: - update_cache: true - name: prometheus-alertmanager - register: apt_result - retries: 3 - until: apt_result is succeeded - -- name: Configure Prometheus Alertmanager - template: - src: prometheus/alertmanager.yml.j2 - dest: /etc/prometheus/alertmanager.yml - notify: Restart Prometheus Alertmanager diff --git a/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2 b/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2 deleted file mode 100644 index 85a31c0..0000000 --- a/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2 +++ /dev/null @@ -1,59 +0,0 @@ -# {{ ansible_managed }} - -global: - # The smarthost and SMTP sender used for mail notifications. - smtp_smarthost: 'proxy.auro.re:25' - smtp_from: 'prometheus@auro.re' - #smtp_auth_username: 'alertmanager' - #smtp_auth_password: 'password' - smtp_require_tls: false - -# The directory from which notification templates are read. -templates: -- '/etc/prometheus/alertmanager_templates/*.tmpl' - -# The root route on which each incoming alert enters. -route: - # The labels by which incoming alerts are grouped together. For example, - # multiple alerts coming in for cluster=A and alertname=LatencyHigh would - # be batched into a single group. - #group_by: ['alertname', 'cluster', 'service'] - group_by: [] # do not group for text chat - - # When a new group of alerts is created by an incoming alert, wait at - # least 'group_wait' to send the initial notification. - # This way ensures that you get multiple alerts for the same group that start - # firing shortly after another are batched together on the first - # notification. - group_wait: 1m - - # When the first notification was sent, wait 'group_interval' to send a batch - # of new alerts that started firing for that group. - group_interval: 1m - - # If an alert has successfully been sent, wait 'repeat_interval' to - # resend them. - repeat_interval: 12h - - # A default receiver - receiver: team-monitoring-mails - - -# Inhibition rules allow to mute a set of alerts given that another alert is -# firing. -# We use this to mute any warning-level notifications if the same alert is -# already critical. -inhibit_rules: -- source_match: - severity: 'critical' - target_match: - severity: 'warning' - # Apply inhibition if the alertname is the same. - equal: ['alertname', 'cluster', 'service'] - - -receivers: -- name: 'team-monitoring-mails' - email_configs: - - to: 'monitoring.aurore@lists.crans.org' - diff --git a/roles/prometheus-node/tasks/main.yml b/roles/prometheus-node/tasks/main.yml index 5d6f56f..7ca6350 100644 --- a/roles/prometheus-node/tasks/main.yml +++ b/roles/prometheus-node/tasks/main.yml @@ -3,9 +3,25 @@ apt: update_cache: true name: prometheus-node-exporter + install_recommends: false # Do not install smartmontools register: apt_result retries: 3 until: apt_result is succeeded + when: + - ansible_lsb.codename == 'buster' + +# Prometheus 2 node is in stretch-backports +- name: Install Prometheus node-exporter (stretch-backports) + apt: + update_cache: true + name: prometheus-node-exporter + install_recommends: false + default_release: stretch-backports + register: apt_result + retries: 3 + until: apt_result is succeeded + when: + - ansible_lsb.codename == 'stretch' - name: Activate prometheus-node-exporter service systemd: @@ -13,12 +29,11 @@ enabled: true state: started -# Doesn't work on Debian Stretch +# Doesn't work on Debian Stretch with the old prometheus package - name: Make Prometheus node-exporter listen on adm only - when: - - ansible_lsb.codename == 'buster' lineinfile: path: /etc/default/prometheus-node-exporter regexp: '^ARGS=' - line: "ARGS=\"--web.listen-address={{ ansible_hostname }}.adm.auro.re:9100\"" + line: | + ARGS="--web.listen-address={{ ansible_hostname }}.adm.auro.re:9100" notify: Restart prometheus-node-exporter diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml index 6e730af..62dde31 100644 --- a/roles/prometheus/tasks/main.yml +++ b/roles/prometheus/tasks/main.yml @@ -31,12 +31,27 @@ line: "ARGS=\"--web.listen-address=\"" notify: Restart prometheus-snmp-exporter +# This file store SNMP OIDs +- name: Configure Prometheus snmp-exporter + template: + src: "prometheus/snmp.yml.j2" + dest: "/etc/prometheus/snmp.yml" + mode: 0600 + owner: prometheus + notify: Restart prometheus-snmp-exporter + # We don't need to restart Prometheus when updating nodes - name: Configure Prometheus nodes copy: content: "{{ prometheus_targets | to_nice_json }}" dest: /etc/prometheus/targets.json +# We don't need to restart Prometheus when updating nodes +- name: Configure Prometheus Ubiquity Unifi SNMP devices + copy: + content: "{{ prometheus_unifi_snmp_targets | to_nice_json }}" + dest: /etc/prometheus/targets_unifi_snmp.json + - name: Activate prometheus service systemd: name: prometheus diff --git a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 b/roles/prometheus/templates/prometheus/alert.rules.yml.j2 index 0729cc7..2a10358 100644 --- a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 +++ b/roles/prometheus/templates/prometheus/alert.rules.yml.j2 @@ -17,7 +17,7 @@ groups: # Alert for out of memory - alert: OutOfMemory - expr: ((node_memory_MemFree_bytes or node_memory_MemFree) + (node_memory_Cached_bytes or node_memory_Cached) + (node_memory_Buffers_bytes or node_memory_Buffers)) / (node_memory_MemTotal_bytes or node_memory_MemTotal) * 100 < 10 + expr: (node_memory_MemFree_bytes + node_memory_Cached_bytes + node_memory_Buffers_bytes) / node_memory_MemTotal_bytes * 100 < 10 for: 5m labels: severity: warning @@ -26,7 +26,7 @@ groups: # Alert for out of disk space - alert: OutOfDiskSpace - expr: (node_filesystem_free_bytes{fstype="ext4"} or node_filesystem_free{fstype="ext4"}) / (node_filesystem_size_bytes{fstype="ext4"} or node_filesystem_size{fstype="ext4"}) * 100 < 10 + expr: node_filesystem_free_bytes{fstype="ext4"} / node_filesystem_size_bytes{fstype="ext4"} * 100 < 10 for: 5m labels: severity: warning @@ -44,8 +44,8 @@ groups: # Alert for high CPU usage - alert: CpuUsage - expr: ((100 - avg by (instance) (irate(node_cpu{mode="idle"}[5m])) * 100) or (100 - avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)) > 75 - for: 5m + expr: (100 - avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 75 + for: 10m labels: severity: warning annotations: diff --git a/roles/prometheus/templates/prometheus/prometheus.yml.j2 b/roles/prometheus/templates/prometheus/prometheus.yml.j2 index 8b8fecb..31df6bd 100644 --- a/roles/prometheus/templates/prometheus/prometheus.yml.j2 +++ b/roles/prometheus/templates/prometheus/prometheus.yml.j2 @@ -11,22 +11,57 @@ global: monitor: 'example' # Alertmanager configuration +# Use prometheus alertmanager installed on the same machine alerting: alertmanagers: - static_configs: - - targets: ['localhost:9093'] + - targets: ['{{ prometheus_alertmanager }}'] # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - - "alert.rules.yml" - - "django.rules.yml" + - "alert.rules.yml" # Monitoring alerts, this is the file you may be searching! + - "django.rules.yml" # Custom rules specific for Django project monitoring # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: - - job_name: dummy - # This reload dynamically the list of targets - # You don't need to restart Prometheus when updating targets.json + # The .json in file_sd_configs is dynamically reloaded + + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + + - job_name: servers file_sd_configs: - files: - '/etc/prometheus/targets.json' + relabel_configs: + # Do not put :9100 in instance name, rather here + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - source_labels: [__param_target] + target_label: __address__ + replacement: '$1:9100' + + - job_name: unifi_snmp + file_sd_configs: + - files: + - '/etc/prometheus/targets_unifi_snmp.json' + metrics_path: /snmp + params: + module: [ubiquiti_unifi] + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: + + - job_name: django + scheme: https + static_configs: + - targets: [] diff --git a/roles/prometheus/templates/prometheus/snmp.yml.j2 b/roles/prometheus/templates/prometheus/snmp.yml.j2 new file mode 100644 index 0000000..84dcb65 --- /dev/null +++ b/roles/prometheus/templates/prometheus/snmp.yml.j2 @@ -0,0 +1,297 @@ +# {{ ansible_managed }} +# TODOlist : +# - Faire fonctionner le monitoring des switchs défini ici +# * Configurer tous les switchs avec un compte SNMPv3 +# * Mettre l'inventaire des switchs dans Ansible +# - Optimiser les règles pour les bornes Unifi, +# on pourrait indexer avec les SSID + +procurve_switch: + walk: + - + - + get: + - + - + - + metrics: + - name: sysUpTime + oid: + type: gauge + help: The time (in hundredths of a second) since the network management portion + of the system was last re-initialized. - + - name: sysName + oid: + type: DisplayString + help: An administratively-assigned name for this managed node - + - name: sysLocation + oid: + type: DisplayString + help: The physical location of this node (e.g., 'telephone closet, 3rd floor') + - + - name: ifHCOutOctets + oid: + type: counter + help: The total number of octets transmitted out of the interface, including framing + characters - + indexes: + - labelname: ifIndex + type: gauge + - name: ifHCInOctets + oid: + type: counter + help: The total number of octets received on the interface, including framing + characters - + indexes: + - labelname: ifIndex + type: gauge + version: 3 + auth: + username: prometheus + +ubiquiti_unifi: + walk: + - + get: + - + - + metrics: +# Pour faire une WifiMap un jour, on peut entrer la location dans la conf des bornes +# - name: sysLocation +# oid: +# type: DisplayString +# help: The physical location of this node (e.g., 'telephone closet, 3rd floor') +# - + - name: unifiVapIndex + oid: + type: gauge + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapChannel + oid: + type: gauge + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapEssId + oid: + type: DisplayString + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapName + oid: + type: DisplayString + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapNumStations + oid: + type: gauge + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapRadio + oid: + type: DisplayString + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapRxBytes + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapRxCrypts + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapRxDropped + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapRxErrors + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapRxFrags + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapRxPackets + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapTxBytes + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapTxDropped + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapTxErrors + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapTxPackets + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapTxRetries + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapTxPower + oid: + type: gauge + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapUp + oid: + type: gauge + help: ' -' + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiVapUsage + oid: + type: DisplayString + help: guest or regular user - + indexes: + - labelname: unifiVapIndex + type: gauge + - name: unifiIfIndex + oid: + type: gauge + help: ' -' + indexes: + - labelname: unifiIfIndex + type: gauge + - name: unifiIfName + oid: + type: DisplayString + help: ' -' + indexes: + - labelname: unifiIfIndex + type: gauge + - name: unifiIfRxBytes + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiIfIndex + type: gauge + - name: unifiIfRxDropped + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiIfIndex + type: gauge + - name: unifiIfRxError + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiIfIndex + type: gauge + - name: unifiIfRxMulticast + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiIfIndex + type: gauge + - name: unifiIfRxPackets + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiIfIndex + type: gauge + - name: unifiIfTxBytes + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiIfIndex + type: gauge + - name: unifiIfTxDropped + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiIfIndex + type: gauge + - name: unifiIfTxError + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiIfIndex + type: gauge + - name: unifiIfTxPackets + oid: + type: counter + help: ' -' + indexes: + - labelname: unifiIfIndex + type: gauge + - name: unifiApSystemModel + oid: + type: DisplayString + help: ' -' + - name: unifiApSystemUptime + oid: + type: counter + help: ' -' + version: 3 + auth: + security_level: authPriv + username: snmp_prometheus + password: {{ snmp_unifi_password }} + auth_protocol: SHA + priv_protocol: AES + priv_password: {{ snmp_unifi_password }} diff --git a/roles/proxmox-vm/tasks/main.yml b/roles/proxmox-vm/tasks/main.yml deleted file mode 100644 index be94272..0000000 --- a/roles/proxmox-vm/tasks/main.yml +++ /dev/null @@ -1,17 +0,0 @@ ---- -- name: Define a virtual machine in Proxmox - proxmox_kvm: - api_user: "{{ ansible_user_id }}@pam" - api_password: "{{ password }}" - api_host: "{{ item.virtu }}.adm.auro.re" - name: "{{ item.name }}" - node: "{{ item.virtu }}" - scsihw: virtio-scsi-pci - scsi: '{"scsi0":"{{ item.virtu }}:{{ item.disksize }},format=raw"}' - sata: '{"sata0":"local:iso/{{ item.installiso }},media=cdrom"}' - net: '{"net0":"virtio,bridge=vmbr2"}' # Adm only by default - cores: "{{ item.cores }}" - memory: "{{ item.memory }}" - balloon: "{{ item.memory // 2 }}" - bios: seabios # Ansible module doesn't support UEFI boot disk - loop: "{{ vm_definitions }}"