Big change to how we monitor. let's see

This commit is contained in:
Seth Call 2026-01-08 06:50:30 -06:00
parent ba13ec0072
commit 57768208ce
2 changed files with 37 additions and 29 deletions

View File

@ -87,39 +87,43 @@ kube-prometheus-stack:
additionalScrapeConfigs: additionalScrapeConfigs:
- job_name: 'node-exporter' - job_name: 'node-exporter'
kubernetes_sd_configs: kubernetes_sd_configs:
- role: endpoints - role: pod
relabel_configs: relabel_configs:
# 1. Filter: Precisely target the node-exporter service in the monitoring namespace. # 1. Filter: Precisely target the node-exporter pods in the monitoring namespace.
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name] - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name]
separator: '/' separator: '/'
# Assuming the service name is 'monitoring-prometheus-node-exporter' regex: 'monitoring/monitoring-prometheus-node-exporter-.*'
regex: 'monitoring/monitoring-prometheus-node-exporter'
action: keep action: keep
# 2. Filter: Ensure we are targeting the standard port (usually 9100) # 2. Filter: Ensure we are targeting the standard port (usually 9100)
- source_labels: [__address__] - source_labels: [__meta_kubernetes_pod_container_port_number]
regex: '.*:9100$' regex: '9100'
action: keep action: keep
# 3. THE FIX: Set the instance label correctly # 3. Pull node labels (workload)
- source_labels: [__meta_kubernetes_endpoint_node_name] - source_labels: [__meta_kubernetes_pod_node_label_workload]
target_label: workload
action: replace
# 4. Set instance and node labels correctly
- source_labels: [__meta_kubernetes_pod_node_name]
target_label: instance target_label: instance
action: replace action: replace
- source_labels: [__address__] - source_labels: [__meta_kubernetes_pod_node_name]
target_label: node
action: replace
- source_labels: [__meta_kubernetes_pod_ip]
target_label: ip_address target_label: ip_address
action: replace action: replace
# 4. Replicate standard labels for dashboard compatibility # 5. Replicate standard labels for dashboard compatibility
- action: labelmap - action: labelmap
regex: __meta_kubernetes_pod_label_(.+) regex: __meta_kubernetes_pod_label_(.+)
# Ensure standard labels are present for dashboard compatibility
- source_labels: [__meta_kubernetes_namespace] - source_labels: [__meta_kubernetes_namespace]
target_label: namespace target_label: namespace
- source_labels: [__meta_kubernetes_pod_name] - source_labels: [__meta_kubernetes_pod_name]
target_label: pod target_label: pod
- source_labels: [__meta_kubernetes_endpoint_node_name]
target_label: node
alertmanager: alertmanager:
ingress: ingress:
enabled: true enabled: true

View File

@ -3,7 +3,7 @@
rbac: rbac:
create: true create: true
cpuThresholdMedia: 65 cpuThresholdMedia: 1
cpuThresholdOther: 80 cpuThresholdOther: 80
@ -87,39 +87,43 @@ kube-prometheus-stack:
additionalScrapeConfigs: additionalScrapeConfigs:
- job_name: 'node-exporter' - job_name: 'node-exporter'
kubernetes_sd_configs: kubernetes_sd_configs:
- role: endpoints - role: pod
relabel_configs: relabel_configs:
# 1. Filter: Precisely target the node-exporter service in the monitoring namespace. # 1. Filter: Precisely target the node-exporter pods in the monitoring namespace.
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name] - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name]
separator: '/' separator: '/'
# Assuming the service name is 'monitoring-prometheus-node-exporter' regex: 'monitoring/monitoring-prometheus-node-exporter-.*'
regex: 'monitoring/monitoring-prometheus-node-exporter'
action: keep action: keep
# 2. Filter: Ensure we are targeting the standard port (usually 9100) # 2. Filter: Ensure we are targeting the standard port (usually 9100)
- source_labels: [__address__] - source_labels: [__meta_kubernetes_pod_container_port_number]
regex: '.*:9100$' regex: '9100'
action: keep action: keep
# 3. THE FIX: Set the instance label correctly # 3. Pull node labels (workload)
- source_labels: [__meta_kubernetes_endpoint_node_name] - source_labels: [__meta_kubernetes_pod_node_label_workload]
target_label: workload
action: replace
# 4. Set instance and node labels correctly
- source_labels: [__meta_kubernetes_pod_node_name]
target_label: instance target_label: instance
action: replace action: replace
- source_labels: [__address__] - source_labels: [__meta_kubernetes_pod_node_name]
target_label: node
action: replace
- source_labels: [__meta_kubernetes_pod_ip]
target_label: ip_address target_label: ip_address
action: replace action: replace
# 4. Replicate standard labels for dashboard compatibility # 5. Replicate standard labels for dashboard compatibility
- action: labelmap - action: labelmap
regex: __meta_kubernetes_pod_label_(.+) regex: __meta_kubernetes_pod_label_(.+)
# Ensure standard labels are present for dashboard compatibility
- source_labels: [__meta_kubernetes_namespace] - source_labels: [__meta_kubernetes_namespace]
target_label: namespace target_label: namespace
- source_labels: [__meta_kubernetes_pod_name] - source_labels: [__meta_kubernetes_pod_name]
target_label: pod target_label: pod
- source_labels: [__meta_kubernetes_endpoint_node_name]
target_label: node
alertmanager: alertmanager:
ingress: ingress:
enabled: true enabled: true