29 lines
1003 B
YAML
29 lines
1003 B
YAML
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: cluster-pod-alerts
|
|
labels:
|
|
app: kube-prometheus-stack
|
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
spec:
|
|
groups:
|
|
- name: pod.alerts
|
|
rules:
|
|
- alert: PodOOMKilled
|
|
expr: kube_pod_container_status_terminated_reason{reason="OOMKilled"} > 0
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Pod OOMKilled"
|
|
description: "Container {{ "{{" }} $labels.container {{ "}}" }} in pod {{ "{{" }} $labels.pod {{ "}}" }} (namespace {{ "{{" }} $labels.namespace {{ "}}" }}) was OOMKilled."
|
|
|
|
- alert: PodCrashLoopBackOff
|
|
expr: kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff"} > 0
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Pod in CrashLoopBackOff"
|
|
description: "Pod {{ "{{" }} $labels.pod {{ "}}" }} in namespace {{ "{{" }} $labels.namespace {{ "}}" }} is in CrashLoopBackOff."
|