prometheus: use humanize/humanizePercentage
This commit is contained in:
parent
19953b2951
commit
c2f2c03af6
2 changed files with 38 additions and 37 deletions
|
@ -7,29 +7,29 @@ prometheus__scraping_node:
|
||||||
prometheus__rules_node:
|
prometheus__rules_node:
|
||||||
- alert: OutOfMemory
|
- alert: OutOfMemory
|
||||||
expr:
|
expr:
|
||||||
100 * (
|
(
|
||||||
node_memory_MemFree_bytes
|
node_memory_MemFree_bytes
|
||||||
+ node_memory_Cached_bytes
|
+ node_memory_Cached_bytes
|
||||||
+ node_memory_Buffers_bytes
|
+ node_memory_Buffers_bytes
|
||||||
) / node_memory_MemTotal_bytes < 10
|
) / node_memory_MemTotal_bytes < 0.1
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
FreeMemory: !unsafe '{{ printf "%.0f" $value }} %'
|
FreeMemory: !unsafe "{{ $value | humanizePercentage }} %"
|
||||||
- alert: HostSwapIsFillingUp
|
- alert: HostSwapIsFillingUp
|
||||||
expr:
|
expr:
|
||||||
100 * (
|
(
|
||||||
1 - (
|
1 - (
|
||||||
node_memory_SwapFree_bytes
|
node_memory_SwapFree_bytes
|
||||||
/ node_memory_SwapTotal_bytes
|
/ node_memory_SwapTotal_bytes
|
||||||
)
|
)
|
||||||
) >= 50
|
) >= 0.5
|
||||||
for: 3m
|
for: 3m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
UsedSwap: !unsafe '{{ printf "%.0f" $value }} %'
|
UsedSwap: !unsafe "{{ $value | humanizePercentage }} %"
|
||||||
- alert: HostPhysicalComponentTooHot
|
- alert: HostPhysicalComponentTooHot
|
||||||
expr:
|
expr:
|
||||||
node_hwmon_temp_celsius > 79
|
node_hwmon_temp_celsius > 79
|
||||||
|
@ -37,7 +37,7 @@ prometheus__rules_node:
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
Temperature: !unsafe '{{ printf "%.0f" $value }} °C'
|
Temperature: !unsafe "{{ $value | humanize }} °C"
|
||||||
Chip: !unsafe "{{ $labels.chip }}"
|
Chip: !unsafe "{{ $labels.chip }}"
|
||||||
Sensor: !unsafe "{{ $labels.sensor }}"
|
Sensor: !unsafe "{{ $labels.sensor }}"
|
||||||
- alert: HostNodeOvertemperatureAlarm
|
- alert: HostNodeOvertemperatureAlarm
|
||||||
|
@ -92,8 +92,8 @@ prometheus__rules_node:
|
||||||
- alert: OutOfDiskSpace
|
- alert: OutOfDiskSpace
|
||||||
expr:
|
expr:
|
||||||
(
|
(
|
||||||
100 * node_filesystem_free_bytes
|
node_filesystem_free_bytes
|
||||||
/ node_filesystem_size_bytes < 10
|
/ node_filesystem_size_bytes < 0.1
|
||||||
)
|
)
|
||||||
and on (instance, device, mountpoint) (
|
and on (instance, device, mountpoint) (
|
||||||
node_filesystem_readonly
|
node_filesystem_readonly
|
||||||
|
@ -102,19 +102,19 @@ prometheus__rules_node:
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
Mountpoint: !unsafe '{{ $labels.mountpoint }}'
|
Mountpoint: !unsafe "{{ $labels.mountpoint }}"
|
||||||
FreeSpace: !unsafe '{{ printf "%.0f" $value }} %'
|
FreeSpace: !unsafe "{{ $value | humanizePercentage }} %"
|
||||||
- alert: HostConntrackLimit
|
- alert: HostConntrackLimit
|
||||||
expr:
|
expr:
|
||||||
100 * (
|
(
|
||||||
node_nf_conntrack_entries
|
node_nf_conntrack_entries
|
||||||
/ node_nf_conntrack_entries_limit
|
/ node_nf_conntrack_entries_limit
|
||||||
) > 80
|
) > 0.8
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
Filled: !unsafe '{{ printf "%.0f" $value }} %'
|
Filled: !unsafe "{{ $value | humanizePercentage }} %"
|
||||||
- alert: HostClockSkew
|
- alert: HostClockSkew
|
||||||
expr:
|
expr:
|
||||||
(
|
(
|
||||||
|
@ -142,26 +142,26 @@ prometheus__rules_node:
|
||||||
severity: warning
|
severity: warning
|
||||||
- alert: OutOfInodes
|
- alert: OutOfInodes
|
||||||
expr:
|
expr:
|
||||||
100 * node_filesystem_files_free
|
node_filesystem_files_free
|
||||||
/ node_filesystem_files < 10
|
/ node_filesystem_files < 0.1
|
||||||
for: 3m
|
for: 3m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
Mountpoint: !unsafe "{{ $labels.mountpoint }}"
|
Mountpoint: !unsafe "{{ $labels.mountpoint }}"
|
||||||
FreeInodes: !unsafe '{{ printf "%.0f" $value }} %'
|
FreeInodes: !unsafe "{{ $value | humanizePercentage }} %"
|
||||||
- alert: CpuUsage
|
- alert: CpuUsage
|
||||||
expr:
|
expr:
|
||||||
100 * (
|
(
|
||||||
1 - avg by (instance) (
|
1 - avg by (instance) (
|
||||||
irate(node_cpu_seconds_total{mode="idle"}[5m])
|
irate(node_cpu_seconds_total{mode="idle"}[5m])
|
||||||
)
|
)
|
||||||
) > 75
|
) > 0.75
|
||||||
for: 10m
|
for: 10m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
Usage: !unsafe '{{ printf "%.0f" $value }} %'
|
Usage: !unsafe "{{ $value | humanizePercentage }} %"
|
||||||
- alert: SystemdServiceFailed
|
- alert: SystemdServiceFailed
|
||||||
expr:
|
expr:
|
||||||
node_systemd_unit_state{state="failed"} == 1
|
node_systemd_unit_state{state="failed"} == 1
|
||||||
|
@ -177,7 +177,7 @@ prometheus__rules_node:
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
Load1: !unsafe '{{ printf "%.0f" $value }}'
|
Load1: !unsafe "{{ $value | humanize }}"
|
||||||
- alert: UnhealthyDisk
|
- alert: UnhealthyDisk
|
||||||
expr:
|
expr:
|
||||||
smartmon_device_smart_healthy < 1
|
smartmon_device_smart_healthy < 1
|
||||||
|
@ -188,12 +188,13 @@ prometheus__rules_node:
|
||||||
Disk: !unsafe "{{ $labels.disk }}"
|
Disk: !unsafe "{{ $labels.disk }}"
|
||||||
- alert: HostCpuStealNoisyNeighbor
|
- alert: HostCpuStealNoisyNeighbor
|
||||||
expr:
|
expr:
|
||||||
100 * avg by (instance) (
|
avg by (instance) (
|
||||||
rate(node_cpu_seconds_total{mode="steal"}[5m])
|
rate(node_cpu_seconds_total{mode="steal"}[5m])
|
||||||
) > 10
|
) > 0.1
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
Disk: !unsafe "{{ $labels.disk }}"
|
Disk: !unsafe "{{ $labels.disk }}"
|
||||||
|
Steal: !unsafe "{{ $value | humanizePercentage }} %"
|
||||||
...
|
...
|
||||||
|
|
|
@ -20,72 +20,72 @@ prometheus__rules_switch:
|
||||||
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
||||||
- alert: SwitchInErrors
|
- alert: SwitchInErrors
|
||||||
expr:
|
expr:
|
||||||
100 * irate(ifInErrors[5m]) / (
|
irate(ifInErrors[5m]) / (
|
||||||
irate(ifInUcastPkts[5m])
|
irate(ifInUcastPkts[5m])
|
||||||
+ irate(ifInNUcastPkts[5m])
|
+ irate(ifInNUcastPkts[5m])
|
||||||
) > 0.01
|
) > 0.0001
|
||||||
for: 0m
|
for: 0m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
ErrorRate: !unsafe '{{ printf "%.2f" $value }} %'
|
ErrorRate: !unsafe "{{ $value | humanizePercentage }} %"
|
||||||
Interface: !unsafe "{{ $labels.ifName }}
|
Interface: !unsafe "{{ $labels.ifName }}
|
||||||
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
||||||
- alert: SwitchOutErrors
|
- alert: SwitchOutErrors
|
||||||
expr:
|
expr:
|
||||||
100 * irate(ifOutErrors[5m]) / (
|
irate(ifOutErrors[5m]) / (
|
||||||
irate(ifOutUcastPkts[5m])
|
irate(ifOutUcastPkts[5m])
|
||||||
+ irate(ifOutNUcastPkts[5m])
|
+ irate(ifOutNUcastPkts[5m])
|
||||||
) > 0.01
|
) > 0.0001
|
||||||
for: 0m
|
for: 0m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
ErrorRate: !unsafe '{{ printf "%.2f" $value }} %'
|
ErrorRate: !unsafe "{{ $value | humanizePercentage }} %"
|
||||||
Interface: !unsafe "{{ $labels.ifName }}
|
Interface: !unsafe "{{ $labels.ifName }}
|
||||||
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
||||||
- alert: SwitchInLinkUsage
|
- alert: SwitchInLinkUsage
|
||||||
expr:
|
expr:
|
||||||
100 * rate(ifHCInOctets[5m]) / (ifHighSpeed * 1000000 / 8) > 50
|
rate(ifHCInOctets[5m]) / (ifHighSpeed * 1000000 / 8) > 0.5
|
||||||
for: 5m
|
for: 5m
|
||||||
keep_firing_for: 10m
|
keep_firing_for: 10m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
Usage: !unsafe '{{ printf "%.2f" $value }} %'
|
Usage: !unsafe "{{ $value | humanizePercentage }} %"
|
||||||
Interface: !unsafe "{{ $labels.ifName }}
|
Interface: !unsafe "{{ $labels.ifName }}
|
||||||
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
||||||
- alert: SwitchInLinkUsage
|
- alert: SwitchInLinkUsage
|
||||||
expr:
|
expr:
|
||||||
100 * rate(ifHCInOctets[5m]) / (ifHighSpeed * 1000000 / 8) > 80
|
rate(ifHCInOctets[5m]) / (ifHighSpeed * 1000000 / 8) > 0.8
|
||||||
for: 5m
|
for: 5m
|
||||||
keep_firing_for: 10m
|
keep_firing_for: 10m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
Usage: !unsafe '{{ printf "%.2f" $value }} %'
|
Usage: !unsafe "{{ $value | humanizePercentage }} %"
|
||||||
Interface: !unsafe "{{ $labels.ifName }}
|
Interface: !unsafe "{{ $labels.ifName }}
|
||||||
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
||||||
- alert: SwitchOutLinkUsage
|
- alert: SwitchOutLinkUsage
|
||||||
expr:
|
expr:
|
||||||
100 * rate(ifHCOutOctets[5m]) / (ifHighSpeed * 1000000 / 8) > 50
|
rate(ifHCOutOctets[5m]) / (ifHighSpeed * 1000000 / 8) > 0.5
|
||||||
for: 5m
|
for: 5m
|
||||||
keep_firing_for: 10m
|
keep_firing_for: 10m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
Usage: !unsafe '{{ printf "%.2f" $value }} %'
|
Usage: !unsafe "{{ $value | humanizePercentage }} %"
|
||||||
Interface: !unsafe "{{ $labels.ifName }}
|
Interface: !unsafe "{{ $labels.ifName }}
|
||||||
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
||||||
- alert: SwitchOutLinkUsage
|
- alert: SwitchOutLinkUsage
|
||||||
expr:
|
expr:
|
||||||
100 * rate(ifHCOutOctets[5m]) / (ifHighSpeed * 1000000 / 8) > 80
|
rate(ifHCOutOctets[5m]) / (ifHighSpeed * 1000000 / 8) > 0.8
|
||||||
for: 5m
|
for: 5m
|
||||||
keep_firing_for: 10m
|
keep_firing_for: 10m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
Usage: !unsafe '{{ printf "%.2f" $value }} %'
|
Usage: !unsafe "{{ $value | humanizePercentage }} %"
|
||||||
Interface: !unsafe "{{ $labels.ifName }}
|
Interface: !unsafe "{{ $labels.ifName }}
|
||||||
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
{{ if $labels.ifAlias }}- {{ $labels.ifAlias }}{{ end }}"
|
||||||
...
|
...
|
||||||
|
|
Loading…
Reference in a new issue