diff --git a/k8s/webrtc-be/templates/alerts.yaml b/k8s/webrtc-be/templates/alerts.yaml index 8f66a1f..092a16a 100644 --- a/k8s/webrtc-be/templates/alerts.yaml +++ b/k8s/webrtc-be/templates/alerts.yaml @@ -21,7 +21,7 @@ spec: - alert: WebrtcBeCrashed expr: increase(kube_pod_container_status_restarts_total{container="webrtc-be", namespace="webrtc-be"}[5m]) > 0 - for: 1m + # Instant alert - no 'for' duration labels: severity: critical annotations: @@ -29,3 +29,14 @@ spec: description: "The webrtc-be pod has crashed. Please check the logs." loki_link: >- {{ .Values.grafana.externalUrl }}/explore?orgId=1&left=["now-1h","now","Loki",{"expr":"{container=\"webrtc-be\", namespace=\"webrtc-be\"}"}] + + - alert: WebrtcBeDown + expr: kube_deployment_status_replicas_available{deployment="webrtc-be", namespace="webrtc-be"} == 0 + for: 1m + labels: + severity: critical + annotations: + summary: "webrtc-be is down" + description: "The webrtc-be service has been unavailable for more than 1 minute." + loki_link: >- + {{ .Values.grafana.externalUrl }}/explore?orgId=1&left=["now-1h","now","Loki",{"expr":"{container=\"webrtc-be\", namespace=\"webrtc-be\"}"}]