--- prometheus__scraping_quanta: targets: "{{ groups.quanta }}" address: 127.0.0.1:9116 path: /snmp timeout: 60s params: module: - quanta prometheus__rules_quanta: - alert: QuantaQueueOverflow expr: snAgGblQueueOverflow == 1 for: 0m labels: severity: critical - alert: QuantaCpuUsage expr: snAgGblCpuUtil1MinAvg > 50 for: 5m labels: severity: warning annotations: Usage: !unsafe "{{ $value }} %" - alert: QuantaCpuUsage expr: snAgGblCpuUtil1MinAvg > 80 for: 5m labels: severity: critical annotations: Usage: !unsafe "{{ $value }} %" - alert: QuantaMemoryUsage expr: 100 * (1 - (snAgGblDynMemFree / snAgGblDynMemTotal)) > 50 for: 5m labels: severity: warning annotations: UsedMemory: !unsafe "{{ $value }} %" - alert: QuantaMemoryUsage expr: 100 * (1 - (snAgGblDynMemFree / snAgGblDynMemTotal)) > 80 for: 5m labels: severity: alert annotations: UsedMemory: !unsafe "{{ $value }} %" - alert: QuantaFanHealth expr: snChasFanOperStatus{snChasFanOperStatus="normal"} == 0 for: 0m labels: severity: critical annotations: Description: !unsafe "{{ $labels.shChasFanDescription }}" Status: !unsafe "{{ $labels.snChasFanOperStatus }}" - alert: QuantaMissingIntakeTemp expr: count by (instance) ( snAgentTempValue ) - count by (instance) ( snAgentTempValue{snAgentTempSensorDescr=~".*Intake.*"} ) == 0 for: 0m labels: severity: critical - alert: QuantaIntakeTemp expr: 0.5 * snAgentTempValue{snAgentTempSensorDescr=~".*Intake.*"} > 60 for: 10m keep_firing_for: 30m labels: severity: warning annotations: Temperature: !unsafe "{{ $value }} °C" Description: !unsafe "{{ $labels.snAgentTempSensorDescr }}" - alert: QuantaIntakeTemp expr: 0.5 * snAgentTempValue{snAgentTempSensorDescr=~".*Intake.*"} > 70 for: 10m keep_firing_for: 30m labels: severity: critical annotations: Temperature: !unsafe "{{ $value }} °C" Description: !unsafe "{{ $labels.snAgentTempSensorDescr }}" - alert: QuantaPowerRedundancyFailure expr: count by (instance) ( snChasPwrSupplyOperStatus{snChasPwrSupplyOperStatus="normal"} ) < 2 for: 0m labels: severity: warning ...