apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: cluster-pod-alerts labels: app: kube-prometheus-stack app.kubernetes.io/instance: {{ .Release.Name }} spec: groups: - name: pod.alerts rules: - alert: PodOOMKilled expr: kube_pod_container_status_terminated_reason{reason="OOMKilled"} > 0 for: 0m labels: severity: critical annotations: summary: "Pod OOMKilled" description: "Container {{ "{{" }} $labels.container {{ "}}" }} in pod {{ "{{" }} $labels.pod {{ "}}" }} (namespace {{ "{{" }} $labels.namespace {{ "}}" }}) was OOMKilled." - alert: PodCrashLoopBackOff expr: kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff"} > 0 for: 2m labels: severity: critical annotations: summary: "Pod in CrashLoopBackOff" description: "Pod {{ "{{" }} $labels.pod {{ "}}" }} in namespace {{ "{{" }} $labels.namespace {{ "}}" }} is in CrashLoopBackOff."