video-iac/k8s/monitoring/templates/pod-alerts.yaml

29 lines
1003 B
YAML

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: cluster-pod-alerts
labels:
app: kube-prometheus-stack
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
groups:
- name: pod.alerts
rules:
- alert: PodOOMKilled
expr: kube_pod_container_status_terminated_reason{reason="OOMKilled"} > 0
for: 0m
labels:
severity: critical
annotations:
summary: "Pod OOMKilled"
description: "Container {{ "{{" }} $labels.container {{ "}}" }} in pod {{ "{{" }} $labels.pod {{ "}}" }} (namespace {{ "{{" }} $labels.namespace {{ "}}" }}) was OOMKilled."
- alert: PodCrashLoopBackOff
expr: kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff"} > 0
for: 2m
labels:
severity: critical
annotations:
summary: "Pod in CrashLoopBackOff"
description: "Pod {{ "{{" }} $labels.pod {{ "}}" }} in namespace {{ "{{" }} $labels.namespace {{ "}}" }} is in CrashLoopBackOff."