Big change to how we monitor. let's see

This commit is contained in:
Seth Call 2026-01-08 06:50:30 -06:00
parent ba13ec0072
commit 57768208ce
2 changed files with 37 additions and 29 deletions

View File

@ -87,39 +87,43 @@ kube-prometheus-stack:
additionalScrapeConfigs:
- job_name: 'node-exporter'
kubernetes_sd_configs:
- role: endpoints
- role: pod
relabel_configs:
# 1. Filter: Precisely target the node-exporter service in the monitoring namespace.
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name]
# 1. Filter: Precisely target the node-exporter pods in the monitoring namespace.
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name]
separator: '/'
# Assuming the service name is 'monitoring-prometheus-node-exporter'
regex: 'monitoring/monitoring-prometheus-node-exporter'
regex: 'monitoring/monitoring-prometheus-node-exporter-.*'
action: keep
# 2. Filter: Ensure we are targeting the standard port (usually 9100)
- source_labels: [__address__]
regex: '.*:9100$'
- source_labels: [__meta_kubernetes_pod_container_port_number]
regex: '9100'
action: keep
# 3. THE FIX: Set the instance label correctly
- source_labels: [__meta_kubernetes_endpoint_node_name]
# 3. Pull node labels (workload)
- source_labels: [__meta_kubernetes_pod_node_label_workload]
target_label: workload
action: replace
# 4. Set instance and node labels correctly
- source_labels: [__meta_kubernetes_pod_node_name]
target_label: instance
action: replace
- source_labels: [__address__]
- source_labels: [__meta_kubernetes_pod_node_name]
target_label: node
action: replace
- source_labels: [__meta_kubernetes_pod_ip]
target_label: ip_address
action: replace
# 4. Replicate standard labels for dashboard compatibility
# 5. Replicate standard labels for dashboard compatibility
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
# Ensure standard labels are present for dashboard compatibility
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
- source_labels: [__meta_kubernetes_endpoint_node_name]
target_label: node
alertmanager:
ingress:
enabled: true

View File

@ -3,7 +3,7 @@
rbac:
create: true
cpuThresholdMedia: 65
cpuThresholdMedia: 1
cpuThresholdOther: 80
@ -87,39 +87,43 @@ kube-prometheus-stack:
additionalScrapeConfigs:
- job_name: 'node-exporter'
kubernetes_sd_configs:
- role: endpoints
- role: pod
relabel_configs:
# 1. Filter: Precisely target the node-exporter service in the monitoring namespace.
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name]
# 1. Filter: Precisely target the node-exporter pods in the monitoring namespace.
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_name]
separator: '/'
# Assuming the service name is 'monitoring-prometheus-node-exporter'
regex: 'monitoring/monitoring-prometheus-node-exporter'
regex: 'monitoring/monitoring-prometheus-node-exporter-.*'
action: keep
# 2. Filter: Ensure we are targeting the standard port (usually 9100)
- source_labels: [__address__]
regex: '.*:9100$'
- source_labels: [__meta_kubernetes_pod_container_port_number]
regex: '9100'
action: keep
# 3. THE FIX: Set the instance label correctly
- source_labels: [__meta_kubernetes_endpoint_node_name]
# 3. Pull node labels (workload)
- source_labels: [__meta_kubernetes_pod_node_label_workload]
target_label: workload
action: replace
# 4. Set instance and node labels correctly
- source_labels: [__meta_kubernetes_pod_node_name]
target_label: instance
action: replace
- source_labels: [__address__]
- source_labels: [__meta_kubernetes_pod_node_name]
target_label: node
action: replace
- source_labels: [__meta_kubernetes_pod_ip]
target_label: ip_address
action: replace
# 4. Replicate standard labels for dashboard compatibility
# 5. Replicate standard labels for dashboard compatibility
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
# Ensure standard labels are present for dashboard compatibility
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
- source_labels: [__meta_kubernetes_endpoint_node_name]
target_label: node
alertmanager:
ingress:
enabled: true