44 lines
1.8 KiB
YAML
44 lines
1.8 KiB
YAML
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: webrtc-be-log-alerts
|
|
labels:
|
|
app: webrtc-be
|
|
release: prometheus-stack
|
|
spec:
|
|
groups:
|
|
- name: webrtc-be.alerts
|
|
rules:
|
|
# - alert: WebrtcBeError
|
|
# expr: 'sum(count_over_time({container="webrtc-be", namespace="webrtc-be"} |= "error" [5m])) > 0'
|
|
# for: 1m
|
|
# labels:
|
|
# severity: critical
|
|
# annotations:
|
|
# summary: "Errors found in webrtc-be logs"
|
|
# description: "The webrtc-be container is logging errors. Please check the logs."
|
|
# loki_link: >-
|
|
# {{ .Values.grafana.externalUrl }}/explore?orgId=1&left=["now-1h","now","Loki",{"expr":"{container=\"webrtc-be\", namespace=\"webrtc-be\"}"}]
|
|
|
|
- alert: WebrtcBeCrashed
|
|
expr: increase(kube_pod_container_status_restarts_total{container="webrtc-be", namespace="webrtc-be"}[5m]) > 0
|
|
# Instant alert - no 'for' duration
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "webrtc-be crashed"
|
|
description: "The webrtc-be pod has crashed. Please check the logs."
|
|
loki_link: >-
|
|
{{ .Values.grafana.externalUrl }}/grafana/d/loki-logs-fixed-v10/loki-logs-fixed-v10?var-namespace={{ .Release.Namespace }}&var-container=webrtc-be&var-logs=loki&var-level=$__all
|
|
|
|
- alert: WebrtcBeDown
|
|
expr: kube_deployment_status_replicas_available{deployment="webrtc-be", namespace="webrtc-be"} == 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "webrtc-be is down"
|
|
description: "The webrtc-be service has been unavailable for more than 1 minute."
|
|
loki_link: >-
|
|
{{ .Values.grafana.externalUrl }}/grafana/d/loki-logs-fixed-v10/loki-logs-fixed-v10?var-namespace={{ .Release.Namespace }}&var-container=webrtc-be&var-logs=loki&var-level=$__all
|