Merge pull request 'Prometheus: cleanup' (#90) from prometheus_cleanup into master
continuous-integration/drone/push Build is passing
Details
continuous-integration/drone/push Build is passing
Details
Reviewed-on: #90pull/92/head
commit
4a3ba6f366
@ -1,106 +0,0 @@
|
||||
# {{ ansible_managed }}
|
||||
{# As this is also Jinja2 it will conflict without a raw block #}
|
||||
{% raw %}
|
||||
groups:
|
||||
- name: django.rules
|
||||
rules:
|
||||
- record: job:django_http_requests_before_middlewares_total:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_before_middlewares_total[30s])) BY (job)
|
||||
- record: job:django_http_requests_unknown_latency_total:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_unknown_latency_total[30s])) BY (job)
|
||||
- record: job:django_http_ajax_requests_total:sum_rate30s
|
||||
expr: sum(rate(django_http_ajax_requests_total[30s])) BY (job)
|
||||
- record: job:django_http_responses_before_middlewares_total:sum_rate30s
|
||||
expr: sum(rate(django_http_responses_before_middlewares_total[30s])) BY (job)
|
||||
- record: job:django_http_requests_unknown_latency_including_middlewares_total:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_unknown_latency_including_middlewares_total[30s]))
|
||||
BY (job)
|
||||
- record: job:django_http_requests_body_total_bytes:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_body_total_bytes[30s])) BY (job)
|
||||
- record: job:django_http_responses_streaming_total:sum_rate30s
|
||||
expr: sum(rate(django_http_responses_streaming_total[30s])) BY (job)
|
||||
- record: job:django_http_responses_body_total_bytes:sum_rate30s
|
||||
expr: sum(rate(django_http_responses_body_total_bytes[30s])) BY (job)
|
||||
- record: job:django_http_requests_total:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_total_by_method[30s])) BY (job)
|
||||
- record: job:django_http_requests_total_by_method:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_total_by_method[30s])) BY (job, method)
|
||||
- record: job:django_http_requests_total_by_transport:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_total_by_transport[30s])) BY (job, transport)
|
||||
- record: job:django_http_requests_total_by_view:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_total_by_view_transport_method[30s])) BY (job,
|
||||
view)
|
||||
- record: job:django_http_requests_total_by_view_transport_method:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_total_by_view_transport_method[30s])) BY (job,
|
||||
view, transport, method)
|
||||
- record: job:django_http_responses_total_by_templatename:sum_rate30s
|
||||
expr: sum(rate(django_http_responses_total_by_templatename[30s])) BY (job, templatename)
|
||||
- record: job:django_http_responses_total_by_status:sum_rate30s
|
||||
expr: sum(rate(django_http_responses_total_by_status[30s])) BY (job, status)
|
||||
- record: job:django_http_responses_total_by_charset:sum_rate30s
|
||||
expr: sum(rate(django_http_responses_total_by_charset[30s])) BY (job, charset)
|
||||
- record: job:django_http_exceptions_total_by_type:sum_rate30s
|
||||
expr: sum(rate(django_http_exceptions_total_by_type[30s])) BY (job, type)
|
||||
- record: job:django_http_exceptions_total_by_view:sum_rate30s
|
||||
expr: sum(rate(django_http_exceptions_total_by_view[30s])) BY (job, view)
|
||||
- record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.5, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "50"
|
||||
- record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.95, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "95"
|
||||
- record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.99, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "99"
|
||||
- record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.999, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "99.9"
|
||||
- record: job:django_http_requests_latency_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.5, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "50"
|
||||
- record: job:django_http_requests_latency_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.95, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "95"
|
||||
- record: job:django_http_requests_latency_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.99, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "99"
|
||||
- record: job:django_http_requests_latency_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.999, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "99.9"
|
||||
- record: job:django_model_inserts_total:sum_rate1m
|
||||
expr: sum(rate(django_model_inserts_total[1m])) BY (job, model)
|
||||
- record: job:django_model_updates_total:sum_rate1m
|
||||
expr: sum(rate(django_model_updates_total[1m])) BY (job, model)
|
||||
- record: job:django_model_deletes_total:sum_rate1m
|
||||
expr: sum(rate(django_model_deletes_total[1m])) BY (job, model)
|
||||
- record: job:django_db_new_connections_total:sum_rate30s
|
||||
expr: sum(rate(django_db_new_connections_total[30s])) BY (alias, vendor)
|
||||
- record: job:django_db_new_connection_errors_total:sum_rate30s
|
||||
expr: sum(rate(django_db_new_connection_errors_total[30s])) BY (alias, vendor)
|
||||
- record: job:django_db_execute_total:sum_rate30s
|
||||
expr: sum(rate(django_db_execute_total[30s])) BY (alias, vendor)
|
||||
- record: job:django_db_execute_many_total:sum_rate30s
|
||||
expr: sum(rate(django_db_execute_many_total[30s])) BY (alias, vendor)
|
||||
- record: job:django_db_errors_total:sum_rate30s
|
||||
expr: sum(rate(django_db_errors_total[30s])) BY (alias, vendor, type)
|
||||
- record: job:django_migrations_applied_total:max
|
||||
expr: max(django_migrations_applied_total) BY (job, connection)
|
||||
- record: job:django_migrations_unapplied_total:max
|
||||
expr: max(django_migrations_unapplied_total) BY (job, connection)
|
||||
{% endraw %}
|
@ -0,0 +1,83 @@
|
||||
---
|
||||
{{ ansible_managed | comment }}
|
||||
|
||||
{% macro raw(string) -%}
|
||||
{{ "{{" }} {{ string }} {{ "}}" }}
|
||||
{%- endmacro %}
|
||||
|
||||
groups:
|
||||
|
||||
- name: ilo.rules
|
||||
rules:
|
||||
|
||||
- alert: IloResilientMemoryDegraded
|
||||
expr: cpqHeResilientMemCondition{cpqHeResilientMemCondition!~"ok|other"} == 1
|
||||
for: 3m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: >-
|
||||
La mémoire vive n'est plus résiliente
|
||||
({{ raw('$labels.cpqHeResilientMemCondition') }})
|
||||
|
||||
- alert: IloBiosSelfTestDegraded
|
||||
expr: cpqHeHWBiosCondition{cpqHeHWBiosCondition!~"ok|other"} == 1
|
||||
for: 3m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: >-
|
||||
Une erreur a été détectée lors du POST du serveur
|
||||
({{ raw('$labels.cpqHeHWBiosCondition') }})
|
||||
|
||||
- alert: IloBatteryDegraded
|
||||
expr: cpqHeSysBatteryCondition{cpqHeSysBatteryCondition!~"ok|other"} == 1
|
||||
for: 3m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: >-
|
||||
La batterie est dégradée
|
||||
({{ raw('$labels.cpqHeSysBatteryCondition') }})
|
||||
|
||||
- alert: IloTemperatureSensorDegraded
|
||||
expr: cpqHeTemperatureCondition{cpqHeTemperatureCondition!~"ok|other"} == 1
|
||||
for: 3m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: >-
|
||||
Le capteur de température est dégradé
|
||||
({{ raw('$labels.cpqHeTemperatureCondition') }})
|
||||
|
||||
- alert: IloFanDegraded
|
||||
expr: cpqHeFltTolFanCondition{cpqHeFltTolFanCondition!~"ok|other"} == 1
|
||||
for: 3m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: >-
|
||||
Le ventilateur est dégradé
|
||||
({{ raw('$labels.cpqHeFltTolFanCondition') }})
|
||||
|
||||
- alert: IloPowerSupplyDegraded
|
||||
expr: cpqHeFltTolPowerSupplyStatus{cpqHeFltTolPowerSupplyStatus!="noError"} == 1
|
||||
for: 3m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: >-
|
||||
L'alimentation est dégradée
|
||||
({{ raw('$labels.cpqHeFltTolPowerSupplyStatus') }})
|
||||
|
||||
- alert: IloOverrideSwitchState
|
||||
expr: cpqSm2CntlriLOSecurityOverrideSwitchState{cpqSm2CntlriLOSecurityOverrideSwitchState="set"} == 1
|
||||
for: 3m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: >-
|
||||
Le switch de réinitialisation n'est pas à l'état d'origine,
|
||||
l'authentification est bypassée
|
||||
|
||||
...
|
Loading…
Reference in New Issue