CrashLoopBackOff events + OOMKiller

This commit is contained in:
Seth Call 2026-01-06 06:30:31 -06:00
parent 9438c9f57d
commit 0277a790b6
2 changed files with 42 additions and 2 deletions

View File

@ -161,6 +161,12 @@ kube-prometheus-stack:
- match: - match:
alertname: WebrtcBeError alertname: WebrtcBeError
receiver: 'email-and-slack-notifications' receiver: 'email-and-slack-notifications'
- match:
alertname: PodOOMKilled
receiver: 'slack-notifications-oom'
- match:
alertname: PodCrashLoopBackOff
receiver: 'slack-notifications'
receivers: receivers:
- name: 'null' - name: 'null'
- name: 'email-alerts' - name: 'email-alerts'
@ -172,7 +178,21 @@ kube-prometheus-stack:
- api_url: 'https://hooks.slack.com/services/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' - api_url: 'https://hooks.slack.com/services/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'
channel: '#video-cluster-prd-alerts' channel: '#video-cluster-prd-alerts'
send_resolved: true send_resolved: true
title: '[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] Monitoring Event Notification' title: '[PRODUCTION] [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] Monitoring Event Notification'
text: >-
{{ range .Alerts }}
*Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`
*Description:* {{ .Annotations.description }}
*Details:*
{{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
{{ end }}
{{ end }}
- name: 'slack-notifications-oom'
slack_configs:
- api_url: 'https://hooks.slack.com/services/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'
channel: '#video-cluster-prd-alerts'
send_resolved: false
title: '[PRODUCTION] [OOM KILLED] Monitoring Event Notification'
text: >- text: >-
{{ range .Alerts }} {{ range .Alerts }}
*Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}` *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`

View File

@ -172,6 +172,12 @@ kube-prometheus-stack:
- match: - match:
alertname: WebrtcBeError alertname: WebrtcBeError
receiver: 'email-and-slack-notifications' receiver: 'email-and-slack-notifications'
- match:
alertname: PodOOMKilled
receiver: 'slack-notifications-oom'
- match:
alertname: PodCrashLoopBackOff
receiver: 'slack-notifications'
receivers: receivers:
- name: 'null' - name: 'null'
- name: 'email-alerts' - name: 'email-alerts'
@ -183,7 +189,21 @@ kube-prometheus-stack:
- api_url: 'https://hooks.slack.com/services/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' - api_url: 'https://hooks.slack.com/services/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'
channel: '#video-cluster-staging-alerts' channel: '#video-cluster-staging-alerts'
send_resolved: true send_resolved: true
title: '[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] Monitoring Event Notification' title: '[STAGING] [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] Monitoring Event Notification'
text: >-
{{ range .Alerts }}
*Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`
*Description:* {{ .Annotations.description }}
*Details:*
{{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
{{ end }}
{{ end }}
- name: 'slack-notifications-oom'
slack_configs:
- api_url: 'https://hooks.slack.com/services/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'
channel: '#video-cluster-staging-alerts'
send_resolved: false
title: '[STAGING] [OOM KILLED] Monitoring Event Notification'
text: >- text: >-
{{ range .Alerts }} {{ range .Alerts }}
*Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}` *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`