# Helm chart values for Prometheus Operator with HTTPS and basic auth
# Explicitly enable RBAC resource creation
rbac:
create: true
cpuThresholdMedia: 1
cpuThresholdOther: 80
kube-prometheus-stack:
prometheus:
prometheusSpec:
nodeSelector:
workload: infra
grafana:
nodeSelector:
workload: infra
alertmanager:
alertmanagerSpec:
nodeSelector:
workload: infra
crds:
enabled: false
# Disable the default ServiceMonitor configuration paths to prevent duplicates
prometheus-node-exporter:
serviceMonitor:
enabled: false
nodeExporter:
serviceMonitor:
enabled: false
prometheus:
ingress:
enabled: true
pathType: Prefix
annotations:
kubernetes.io/ingress.class: nginx
#nginx.ingress.kubernetes.io/rewrite-target: /$2
cert-manager.io/cluster-issuer: letsencrypt-nginx-production
nginx.ingress.kubernetes.io/backend-protocol: "HTTP"
nginx.ingress.kubernetes.io/auth-type: basic
nginx.ingress.kubernetes.io/auth-secret: monitoring-basic-auth
nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
hosts:
- monitoring.video.jamkazam.com
paths:
- /prometheus
tls:
- secretName: monitoring
hosts:
- monitoring.video.jamkazam.com
prometheusSpec:
retention: 60d
retentionSize: 20GB
routePrefix: /prometheus
externalUrl: https://monitoring.video.jamkazam.com/prometheus
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: linode-block-storage-retain
resources:
requests:
storage: 30Gi
# 2. !!! CRUCIAL: Ensure the default ServiceMonitor is ignored !!!
# This prevents duplicate metrics by telling Prometheus to ignore the default SM.
serviceMonitorSelector:
matchExpressions:
# Exclude the default node-exporter ServiceMonitor
- key: app.kubernetes.io/name
operator: NotIn
values:
# Use the label identified above
- prometheus-node-exporter
serviceMonitorNamespaceSelector:
matchExpressions:
- key: kubernetes.io/metadata.name
operator: In
values:
- monitoring # Its own namespace
- webrtc-be # Your app's namespace
# Enable discovery of PrometheusRules in these namespaces
ruleNamespaceSelector: {} # Match all namespaces (avoids dependency on namespace labels)
ruleSelector:
matchExpressions: [] # Match all rules in selected namespaces
# Add the manual scrape configuration
additionalScrapeConfigs:
- job_name: 'node-exporter'
kubernetes_sd_configs:
- role: pod
relabel_configs:
# 1. Filter: Precisely target the node-exporter pods in the monitoring namespace.
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name]
separator: '/'
regex: 'monitoring/monitoring-prometheus-node-exporter-.*'
action: keep
# 2. Filter: Ensure we are targeting the standard port (usually 9100)
- source_labels: [__meta_kubernetes_pod_container_port_number]
regex: '9100'
action: keep
# 3. Pull node labels (workload)
- source_labels: [__meta_kubernetes_pod_node_label_workload]
target_label: workload
action: replace
# 4. Set instance and node labels correctly
- source_labels: [__meta_kubernetes_pod_node_name]
target_label: instance
action: replace
- source_labels: [__meta_kubernetes_pod_node_name]
target_label: node
action: replace
- source_labels: [__meta_kubernetes_pod_ip]
target_label: ip_address
action: replace
# 5. Replicate standard labels for dashboard compatibility
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
alertmanager:
ingress:
enabled: true
pathType: Prefix
annotations:
kubernetes.io/ingress.class: nginx
#nginx.ingress.kubernetes.io/rewrite-target: /$2
cert-manager.io/cluster-issuer: letsencrypt-nginx-production
nginx.ingress.kubernetes.io/backend-protocol: "HTTP"
nginx.ingress.kubernetes.io/auth-type: basic
nginx.ingress.kubernetes.io/auth-secret: monitoring-basic-auth
nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
hosts:
- monitoring.video.jamkazam.com
paths:
- /alertmanager
tls:
- secretName: monitoring
hosts:
- monitoring.video.jamkazam.com
alertmanagerSpec:
routePrefix: /alertmanager
externalUrl: https://monitoring.video.jamkazam.com/alertmanager
storage:
volumeClaimTemplate:
spec:
storageClassName: linode-block-storage-retain
resources:
requests:
storage: 30Gi
config:
global:
resolve_timeout: 5m
smtp_smarthost: 'email-smtp.us-east-1.amazonaws.com:587'
smtp_from: 'support@jamkazam.com'
smtp_auth_username: 'AKIA2SXEHOQFM326T4WJ'
smtp_auth_password: 'BM6zKJUOWSc4XF+1dXZZlqAkbybGX+KbY+YciI7PIcsn'
smtp_require_tls: true
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
receiver: 'null'
routes:
- match:
alertname: NodeHighCPU
receiver: 'slack-notifications'
- match:
alertname: WebrtcBeCrashed
receiver: 'email-and-slack-notifications'
- match:
alertname: WebrtcBeDown
receiver: 'email-and-slack-notifications'
- match:
alertname: WebrtcBeError
receiver: 'email-and-slack-notifications'
- match:
alertname: PodOOMKilled
receiver: 'slack-notifications-oom'
- match:
alertname: PodCrashLoopBackOff
receiver: 'slack-notifications'
receivers:
- name: 'null'
- name: 'email-alerts'
email_configs:
- to: 'alerts@jamkazam.com'
send_resolved: true
- name: 'slack-notifications'
slack_configs:
- api_url: 'https://hooks.slack.com/services/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'
channel: '#video-cluster-prd-alerts'
send_resolved: true
title: '[PRODUCTION] [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] Monitoring Event Notification'
text: >-
{{ range .Alerts }}
*Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`
*Description:* {{ .Annotations.description }}
*Details:*
{{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
{{ end }}
{{ end }}
- name: 'slack-notifications-oom'
slack_configs:
- api_url: 'https://hooks.slack.com/services/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'
channel: '#video-cluster-prd-alerts'
send_resolved: false
title: '[PRODUCTION] [OOM KILLED] Monitoring Event Notification'
text: >-
{{ range .Alerts }}
*Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}`
*Description:* {{ .Annotations.description }}
*Details:*
{{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
{{ end }}
{{ end }}
- name: 'email-and-slack-notifications'
email_configs:
- to: 'alerts@jamkazam.com'
send_resolved: true
headers:
Subject: '[PRODUCTION] {{ .Status | toUpper }} - {{ range .Alerts }}{{ .Annotations.summary }} {{ end }}'
html: '{{ template "email.default.html" . }}