video-iac/k8s/monitoring/templates/node-alerts.yaml

20 lines
668 B
YAML

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: cluster-node-alerts
labels:
app: kube-prometheus-stack
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
groups:
- name: node.alerts
rules:
- alert: NodeHighCPU
expr: (1 - avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m]))) * 100 > 20
for: 2m
labels:
severity: warning
annotations:
summary: "High CPU usage on node {{ "{{" }} $labels.instance {{ "}}" }}"
description: "Node {{ "{{" }} $labels.instance {{ "}}" }} has CPU usage above 20% (current value: {{ "{{" }} $value | printf \"%.2f\" {{ "}}" }}%)"