Attem to deploy loki and promtail

This commit is contained in:
Seth Call 2025-12-06 17:42:59 -06:00
parent caf2078b64
commit 507ddbab2e
12 changed files with 224 additions and 0 deletions

24
k8s/README.md Normal file
View File

@ -0,0 +1,24 @@
# Kubernetes Configuration
This directory contains Kubernetes manifests and configuration for the video infrastructure.
## Managing CRDs
The file `all-crds.yaml` contains all Custom Resource Definitions (CRDs) required by the monitoring stack (Prometheus Operator).
### When to update CRDs
You should regenerate `all-crds.yaml` by running `scripts/update-crds.sh` when:
1. **Upgrading the `kube-prometheus-stack` Helm chart**: If you bump the chart version in `k8s/monitoring/Chart.yaml` and update the dependencies, you must also update the CRDs to match the new version.
2. **Missing CRD fields**: If you encounter errors like `field not declared in schema` during ArgoCD syncs, it likely means the installed CRDs are outdated.
### How to update
Run the update script from the repository root:
```bash
./scripts/update-crds.sh
```
This script extracts the CRDs from the local `kube-prometheus-stack` chart package and concatenates them into `k8s/all-crds.yaml`.

View File

@ -0,0 +1,28 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: loki
spec:
destination:
namespace: loki
server: 'https://kubernetes.default.svc'
source:
helm:
valueFiles:
- values.yaml
path: k8s/loki
repoURL: 'git@bitbucket.org:jamkazam/video-iac.git'
targetRevision: {{ .Values.gitBranch }}
project: default
syncPolicy:
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
automated:
prune: true
retry:
limit: 5
backoff:
duration: 5s
factor: 2
maxDuration: 3m

View File

@ -0,0 +1,28 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: promtail
spec:
destination:
namespace: loki
server: 'https://kubernetes.default.svc'
source:
helm:
valueFiles:
- values.yaml
path: k8s/promtail
repoURL: 'git@bitbucket.org:jamkazam/video-iac.git'
targetRevision: {{ .Values.gitBranch }}
project: default
syncPolicy:
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
automated:
prune: true
retry:
limit: 5
backoff:
duration: 5s
factor: 2
maxDuration: 3m

6
k8s/loki/Chart.yaml Normal file
View File

@ -0,0 +1,6 @@
apiVersion: v2
name: loki
description: A Helm chart for Loki
type: application
version: 0.1.0
appVersion: "1.0"

Binary file not shown.

51
k8s/loki/values.yaml Normal file
View File

@ -0,0 +1,51 @@
loki:
config: |
auth_enabled: false
server:
http_listen_port: 3100
ingester:
lifecycler:
address: 127.0.0.1
ring:
kvstore:
store: inmemory
replication_factor: 1
schema_config:
configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v11
index:
prefix: index_
period: 24h
storage_config:
boltdb_shipper:
active_index_directory: /data/loki/index
shared_store: filesystem
filesystem:
directory: /data/loki/chunks
chunk_store_config:
max_look_back_period: 672h
table_manager:
retention_deletes_enabled: true
retention_period: 672h
singleBinary:
replicas: 1
persistence:
enabled: true
size: 20Gi
storageClass: "linode-block-storage-retain"
read:
replicas: 0
write:
replicas: 0
backend:
replicas: 0
gateway:
enabled: false

View File

@ -131,6 +131,30 @@ kube-prometheus-stack:
requests:
storage: 30Gi
config:
global:
resolve_timeout: 5m
smtp_smarthost: 'email-smtp.us-east-1.amazonaws.com:587'
smtp_from: 'support@jamkazam.com'
smtp_auth_username: 'ses-smtp-user.20251206-174105'
smtp_auth_password: 'BEeyqbF7U/2BvCxXVU672geq1c9fXKisAw+gM5J+vaZi'
smtp_require_tls: true
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
receiver: 'null'
routes:
- match:
alertname: WebrtcBeCrashed
receiver: 'email-alerts'
receivers:
- name: 'null'
- name: 'email-alerts'
email_configs:
- to: 'alerts@jamkazam.com'
send_resolved: true
grafana:
persistence:
enabled: true

View File

@ -131,6 +131,30 @@ kube-prometheus-stack:
requests:
storage: 30Gi
config:
global:
resolve_timeout: 5m
smtp_smarthost: 'email-smtp.us-east-1.amazonaws.com:587'
smtp_from: 'support@jamkazam.com'
smtp_auth_username: 'ses-smtp-user.20251206-174105'
smtp_auth_password: 'BEeyqbF7U/2BvCxXVU672geq1c9fXKisAw+gM5J+vaZi'
smtp_require_tls: true
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
receiver: 'null'
routes:
- match:
alertname: WebrtcBeCrashed
receiver: 'email-alerts'
receivers:
- name: 'null'
- name: 'email-alerts'
email_configs:
- to: 'alerts@jamkazam.com'
send_resolved: true
grafana:
persistence:
enabled: true

6
k8s/promtail/Chart.yaml Normal file
View File

@ -0,0 +1,6 @@
apiVersion: v2
name: promtail
description: A Helm chart for Promtail
type: application
version: 0.1.0
appVersion: "1.0"

Binary file not shown.

4
k8s/promtail/values.yaml Normal file
View File

@ -0,0 +1,4 @@
promtail:
config:
clients:
- url: http://loki.loki.svc:3100/loki/api/v1/push

View File

@ -0,0 +1,29 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: webrtc-be-log-alerts
labels:
app: webrtc-be
spec:
groups:
- name: webrtc-be.alerts
rules:
- alert: WebrtcBeError
expr: 'sum(count_over_time({container="webrtc-be", namespace="webrtc-be"} |= "error" [5m])) > 0'
for: 1m
labels:
severity: critical
annotations:
summary: "Errors found in webrtc-be logs"
description: "The webrtc-be container is logging errors. Please check the logs."
loki_link: >-
{{ .Values.grafana.externalUrl }}/explore?orgId=1&left=["now-1h","now","Loki",{"expr":"{container=\"webrtc-be\", namespace=\"webrtc-be\"}"}]
- alert: WebrtcBeCrashed
expr: increase(kube_pod_container_status_restarts_total{container="webrtc-be", namespace="webrtc-be"}[5m]) > 0
for: 1m
labels:
severity: critical
annotations:
summary: "webrtc-be crashed"
description: "The webrtc-be pod has crashed. Please check the logs."