Test alert
This commit is contained in:
parent
9a6b86f96e
commit
9a0303cbb5
|
|
@ -9,7 +9,26 @@ spec:
|
|||
groups:
|
||||
- name: node.alerts
|
||||
rules:
|
||||
- alert: NodeHighCPU
|
||||
- alert: InternalTestAlert
|
||||
expr: vector(1)
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Internal Alert Pipeline Test"
|
||||
description: "This alert is manually triggered to verify the Slack alerting pipeline."
|
||||
|
||||
- alert: NodeMissingWorkloadLabel
|
||||
expr: |
|
||||
count by (instance) (node_cpu_seconds_total) unless count by (instance) (node_cpu_seconds_total{workload=~".+"})
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Node missing workload label on metrics"
|
||||
description: "Metrics for instance {{ "{{" }} $labels.instance {{ "}}" }} are missing the 'workload' label, which is required for NodeHighCPU alerts."
|
||||
|
||||
- alert: MediaNodeHighCPU
|
||||
expr: |
|
||||
(
|
||||
(1 - avg without (cpu, mode) (rate(node_cpu_seconds_total{mode="idle", workload="media"}[1m]))) * 100 > {{ .Values.cpuThresholdMedia | default 65 }}
|
||||
|
|
@ -23,4 +42,4 @@ spec:
|
|||
severity: warning
|
||||
annotations:
|
||||
summary: "High CPU usage on node {{ "{{" }} $labels.instance {{ "}}" }}"
|
||||
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} has CPU usage above threshold (current value: {{ "{{" }} $value | printf \"%.2f\" {{ "}}" }}%)"
|
||||
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} (workload: {{ "{{" }} $labels.workload {{ "}}" }}) has CPU usage above threshold (current value: {{ "{{" }} $value | printf \"%.2f\" {{ "}}" }}%)"
|
||||
|
|
|
|||
|
|
@ -170,6 +170,15 @@ kube-prometheus-stack:
|
|||
repeat_interval: 12h
|
||||
receiver: 'null'
|
||||
routes:
|
||||
- match:
|
||||
alertname: InternalTestAlert
|
||||
receiver: 'slack-notifications'
|
||||
- match:
|
||||
alertname: MediaNodeHighCPU
|
||||
receiver: 'slack-notifications'
|
||||
- match:
|
||||
alertname: NodeMissingWorkloadLabel
|
||||
receiver: 'slack-notifications'
|
||||
- match:
|
||||
alertname: NodeHighCPU
|
||||
receiver: 'slack-notifications'
|
||||
|
|
|
|||
Loading…
Reference in New Issue