diff --git a/k8s/monitoring/README.md b/k8s/monitoring/README.md
new file mode 100644
index 0000000..d7d737a
--- /dev/null
+++ b/k8s/monitoring/README.md
@@ -0,0 +1,19 @@
+# Monitoring and Alerting
+
+## Slack Webhook Configuration
+
+The Slack notifications use a specific Incoming Webhook URL structure:
+`https://hooks.slack.com/services/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`
+
+These ID components represent:
+- **T0L5RA3E0**: Slack Workspace ID (e.g., JamKazam)
+- **B01SM8RC346**: Bot/App Configuration ID (unique to the specific "Incoming Webhook" integration created in the Slack app management)
+- **XDDOrcPE7eAXJPMCvc5FxIva**: The Secret Token for authentication
+
+### Updating the Webhook
+If you need to change the channel or regenerate the URL:
+1. Go to [Slack App Management](https://api.slack.com/apps).
+2. Select the relevant App (e.g., "Monitoring" or "Incoming Webhooks").
+3. Navigate to **Incoming Webhooks**.
+4. Generate a new Webhook URL for the desired channel.
+5. Update the URL in `values-production.yaml` and `values-staging.yaml`.
diff --git a/k8s/monitoring/values-production.yaml b/k8s/monitoring/values-production.yaml
index 6aa6af8..c84dbab 100644
--- a/k8s/monitoring/values-production.yaml
+++ b/k8s/monitoring/values-production.yaml
@@ -148,15 +148,56 @@ kube-prometheus-stack:
repeat_interval: 12h
receiver: 'null'
routes:
- - match:
+ - match:
alertname: WebrtcBeCrashed
- receiver: 'email-alerts'
+ receiver: 'email-and-slack-notifications'
+ - match:
+ alertname: WebrtcBeError
+ receiver: 'email-and-slack-notifications'
receivers:
- name: 'null'
- name: 'email-alerts'
email_configs:
- to: 'alerts@jamkazam.com'
send_resolved: true
+ - name: 'slack-notifications'
+ slack_configs:
+ - api_url: 'https://hooks.slack.com/services/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'
+ channel: '#monitoring-alerts'
+ send_resolved: true
+ title: '[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] Monitoring Event Notification'
+ text: >-
+ {{ range .Alerts }}
+ *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`
+ *Description:* {{ .Annotations.description }}
+ *Details:*
+ {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
+ {{ end }}
+ {{ end }}
+ - name: 'email-and-slack-notifications'
+ email_configs:
+ - to: 'alerts@jamkazam.com'
+ send_resolved: true
+ headers:
+ Subject: '[PRODUCTION] {{ .Status | toUpper }} - {{ range .Alerts }}{{ .Annotations.summary }} {{ end }}'
+ html: '{{ template "email.default.html" . }}
View in Alertmanager
{{ range .Alerts }}{{ if .Annotations.loki_link }}View Logs in Loki{{ end }}{{ end }}'
+ slack_configs:
+ - api_url: 'https://hooks.slack.com/services/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'
+ channel: '#monitoring-alerts'
+ send_resolved: true
+ title: '[PRODUCTION] [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] Monitoring Event Notification'
+ text: >-
+ {{ range .Alerts }}
+ *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`
+ *Description:* {{ .Annotations.description }}
+ *Details:*
+ {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
+ {{ end }}
+ {{ if .Annotations.loki_link }}
+ *Logs:* <{{ .Annotations.loki_link }}|View in Loki>
+ {{ end }}
+ {{ end }}
+ *Source:* <{{ .ExternalURL }}|Alertmanager>
grafana:
persistence:
enabled: true
diff --git a/k8s/monitoring/values-staging.yaml b/k8s/monitoring/values-staging.yaml
index 2a693c3..70558e5 100644
--- a/k8s/monitoring/values-staging.yaml
+++ b/k8s/monitoring/values-staging.yaml
@@ -148,15 +148,56 @@ kube-prometheus-stack:
repeat_interval: 12h
receiver: 'null'
routes:
- - match:
+ - match:
alertname: WebrtcBeCrashed
- receiver: 'email-alerts'
+ receiver: 'email-and-slack-notifications'
+ - match:
+ alertname: WebrtcBeError
+ receiver: 'email-and-slack-notifications'
receivers:
- name: 'null'
- name: 'email-alerts'
email_configs:
- to: 'alerts@jamkazam.com'
send_resolved: true
+ - name: 'slack-notifications'
+ slack_configs:
+ - api_url: 'https://hooks.slack.com/services/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'
+ channel: '#monitoring-alerts'
+ send_resolved: true
+ title: '[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] Monitoring Event Notification'
+ text: >-
+ {{ range .Alerts }}
+ *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`
+ *Description:* {{ .Annotations.description }}
+ *Details:*
+ {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
+ {{ end }}
+ {{ end }}
+ - name: 'email-and-slack-notifications'
+ email_configs:
+ - to: 'alerts@jamkazam.com'
+ send_resolved: true
+ headers:
+ Subject: '[STAGING] {{ .Status | toUpper }} - {{ range .Alerts }}{{ .Annotations.summary }} {{ end }}'
+ html: '{{ template "email.default.html" . }}
View in Alertmanager
{{ range .Alerts }}{{ if .Annotations.loki_link }}View Logs in Loki{{ end }}{{ end }}'
+ slack_configs:
+ - api_url: 'https://hooks.slack.com/services/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'
+ channel: '#monitoring-alerts'
+ send_resolved: true
+ title: '[STAGING] [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] Monitoring Event Notification'
+ text: >-
+ {{ range .Alerts }}
+ *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`
+ *Description:* {{ .Annotations.description }}
+ *Details:*
+ {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
+ {{ end }}
+ {{ if .Annotations.loki_link }}
+ *Logs:* <{{ .Annotations.loki_link }}|View in Loki>
+ {{ end }}
+ {{ end }}
+ *Source:* <{{ .ExternalURL }}|Alertmanager>
grafana:
persistence:
enabled: true
diff --git a/k8s/webrtc-be/templates/alerts.yaml b/k8s/webrtc-be/templates/alerts.yaml
index aa7e916..8f66a1f 100644
--- a/k8s/webrtc-be/templates/alerts.yaml
+++ b/k8s/webrtc-be/templates/alerts.yaml
@@ -27,3 +27,5 @@ spec:
annotations:
summary: "webrtc-be crashed"
description: "The webrtc-be pod has crashed. Please check the logs."
+ loki_link: >-
+ {{ .Values.grafana.externalUrl }}/explore?orgId=1&left=["now-1h","now","Loki",{"expr":"{container=\"webrtc-be\", namespace=\"webrtc-be\"}"}]
diff --git a/scripts/loki-port-forward.sh b/scripts/loki-port-forward.sh
new file mode 100755
index 0000000..4c67e2f
--- /dev/null
+++ b/scripts/loki-port-forward.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+# Port forward Loki service to localhost:3101
+# Usage: ./scripts/loki-port-forward.sh
+# Keep this running in a separate terminal.
+# Once running, you can use ./scripts/loki-query.sh to inspect logs.
+
+echo "Port forwarding Loki to http://localhost:3101..."
+kubectl -n loki port-forward svc/loki 3101:3100
diff --git a/scripts/loki-query.sh b/scripts/loki-query.sh
new file mode 100755
index 0000000..1f3cb5c
--- /dev/null
+++ b/scripts/loki-query.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# Query Loki for recent logs of a specific pod regex
+# Usage: ./scripts/loki-query.sh [pod_regex]
+# Example: ./scripts/loki-query.sh "webrtc-be-.*"
+
+POD_REGEX="${1:-webrtc-be-.*}"
+
+echo "Querying Loki for pod regex: ${POD_REGEX}"
+echo "Checking labels (namespace, cluster, etc)..."
+
+curl -G -s "http://localhost:3101/loki/api/v1/query_range" \
+ --data-urlencode "query={pod=~\"${POD_REGEX}\"}" \
+ --data-urlencode "limit=1" | jq '.data.result[0].stream'