infra/stacks/monitoring/modules/monitoring/alloy.yaml
Viktor Barzin aba061cf2e alloy: switch pod log shipping from apiserver to file-tail
Replaced 'loki.source.kubernetes' with 'loki.source.file' in alloy DS
config. discovery.relabel.pod_logs already sets __path__ to the kubelet
log path (/var/log/pods/*<uid>/<container>/*.log) and varlog host-mount
was already present, so this is a one-line swap.

Why: apiserver was burning ~700m sustained on 'CONNECT pods/log' streams
(13 req/s, ~2200 sec/s of long-lived TCP connections). Streaming pod
logs through the apiserver instead of tailing kubelet's log files was
the dominant residual cost after the recent Loki/Alloy onboarding.

Measured before/after:
- Alloy DS: ~620m total (5 x ~125m) -> ~92m total (5 x ~18m)
- kube-apiserver: peak 1959m midnight burst, settled 632m

(Stuck-pod recovery: alloy-7zg7t on k8s-master needed --force delete
during rollout — FailedKillPod 'unable to signal init: permission denied'
on runc, transient runtime issue, unrelated to this change.)
2026-05-21 08:27:34 +00:00

218 lines
7.4 KiB
YAML

alloy:
configMap:
content: |-
// Write your Alloy config here:
logging {
level = "info"
format = "logfmt"
}
loki.write "default" {
endpoint {
url = "http://loki.monitoring.svc.cluster.local:3100/loki/api/v1/push"
}
}
// discovery.kubernetes allows you to find scrape targets from Kubernetes resources.
// It watches cluster state and ensures targets are continually synced with what is currently running in your cluster.
discovery.kubernetes "pod" {
role = "pod"
}
// discovery.relabel rewrites the label set of the input targets by applying one or more relabeling rules.
// If no rules are defined, then the input targets are exported as-is.
discovery.relabel "pod_logs" {
targets = discovery.kubernetes.pod.targets
// Label creation - "namespace" field from "__meta_kubernetes_namespace"
rule {
source_labels = ["__meta_kubernetes_namespace"]
action = "replace"
target_label = "namespace"
}
// Label creation - "pod" field from "__meta_kubernetes_pod_name"
rule {
source_labels = ["__meta_kubernetes_pod_name"]
action = "replace"
target_label = "pod"
}
// Label creation - "container" field from "__meta_kubernetes_pod_container_name"
rule {
source_labels = ["__meta_kubernetes_pod_container_name"]
action = "replace"
target_label = "container"
}
// Label creation - "app" field from "__meta_kubernetes_pod_label_app_kubernetes_io_name"
rule {
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name"]
action = "replace"
target_label = "app"
}
// Label creation - "job" field from "__meta_kubernetes_namespace" and "__meta_kubernetes_pod_container_name"
// Concatenate values __meta_kubernetes_namespace/__meta_kubernetes_pod_container_name
rule {
source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"]
action = "replace"
target_label = "job"
separator = "/"
replacement = "$1"
}
// Label creation - "container" field from "__meta_kubernetes_pod_uid" and "__meta_kubernetes_pod_container_name"
// Concatenate values __meta_kubernetes_pod_uid/__meta_kubernetes_pod_container_name.log
rule {
source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
action = "replace"
target_label = "__path__"
separator = "/"
replacement = "/var/log/pods/*$1/*.log"
}
// Label creation - "container_runtime" field from "__meta_kubernetes_pod_container_id"
rule {
source_labels = ["__meta_kubernetes_pod_container_id"]
action = "replace"
target_label = "container_runtime"
regex = "^(\\S+):\\/\\/.+$"
replacement = "$1"
}
}
// loki.source.file tails pod logs from /var/log/pods/* on the host filesystem.
// Previously used loki.source.kubernetes (apiserver streaming) which drove
// kube-apiserver `CONNECT pods/log` to ~13 req/s + ~2200 sec/s of streams.
// discovery.relabel.pod_logs already sets __path__ to the kubelet log path.
loki.source.file "pod_logs" {
targets = discovery.relabel.pod_logs.output
forward_to = [loki.process.pod_logs.receiver]
}
// loki.process receives log entries from other Loki components, applies one or more processing stages,
// and forwards the results to the list of receivers in the component's arguments.
loki.process "pod_logs" {
stage.static_labels {
values = {
cluster = "default",
}
}
forward_to = [loki.write.default.receiver]
}
// Node-level journal log collection for kernel panics, OOMs, hung tasks, etc.
// Ships system logs off-node so they survive hard resets.
loki.source.journal "node_journal" {
forward_to = [loki.process.journal.receiver]
relabel_rules = loki.relabel.journal.rules
labels = {
job = "node-journal",
}
max_age = "12h"
}
loki.relabel "journal" {
forward_to = []
rule {
source_labels = ["__journal__hostname"]
target_label = "node"
}
rule {
source_labels = ["__journal__systemd_unit"]
target_label = "unit"
}
rule {
source_labels = ["__journal_priority_keyword"]
target_label = "level"
}
rule {
source_labels = ["__journal__transport"]
target_label = "transport"
}
}
// Forward warning+ journal entries (priority 0-4: emerg, alert, crit, err, warning)
// Also forwards kernel transport entries regardless of priority for OOM/panic detection.
loki.process "journal" {
stage.static_labels {
values = {
cluster = "default",
}
}
// Drop info/debug/notice entries that aren't from the kernel transport
stage.match {
selector = "{job=\"node-journal\", level=~\"info|notice|debug\", transport!=\"kernel\"}"
action = "drop"
}
forward_to = [loki.write.default.receiver]
}
// Kubernetes audit log collection from /var/log/kubernetes/audit.log
// Requires alloy.mounts.varlog=true to mount /var/log from the host
local.file_match "audit_logs" {
path_targets = [{
__path__ = "/var/log/kubernetes/audit.log",
job = "kubernetes-audit",
node = env("HOSTNAME"),
}]
}
loki.source.file "audit_logs" {
targets = local.file_match.audit_logs.targets
forward_to = [loki.write.default.receiver]
}
# Mount /var/log from the host for file-based log collection (audit logs)
mounts:
varlog: true
# Mount journal directories for loki.source.journal
extra:
- name: journal-run
mountPath: /run/log/journal
readOnly: true
- name: journal-var
mountPath: /var/log/journal
readOnly: true
- name: machine-id
mountPath: /etc/machine-id
readOnly: true
controller:
volumes:
extra:
- name: journal-run
hostPath:
path: /run/log/journal
type: DirectoryOrCreate
- name: journal-var
hostPath:
path: /var/log/journal
type: DirectoryOrCreate
- name: machine-id
hostPath:
path: /etc/machine-id
type: File
# Schedule on control-plane node too so we can tail /var/log/kubernetes/audit.log
# from kube-apiserver. Without this, K8s audit log shipping (wave 1 K2-K9 alert
# rules) has no source. control-plane has the standard NoSchedule taint.
tolerations:
- key: "node-role.kubernetes.io/control-plane"
operator: "Exists"
effect: "NoSchedule"
# Resource limits for DaemonSet pods
# Alloy tails logs from all containers on the node via K8s API and batches
# them to Loki. Memory scales with number of active log streams (~30-50 per node).
# 128Mi was OOMKilled; steady-state usage is ~400-450Mi per pod.
resources:
requests:
cpu: 50m
memory: 512Mi
limits:
memory: 1Gi