From bb0099b747fe04d8365af97e7cebba461ae807d0 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 5 Jun 2026 16:57:44 +0000 Subject: [PATCH] monitoring(alloy): fix broken pod-log shipping (missing local.file_match) + parse CRI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cluster pod logs were NOT reaching Loki — only node/Pi journals were. Root cause: loki.source.file was fed the /var/log/pods/*//*.log glob directly from discovery.relabel, but loki.source.file does NOT expand globs, so it stat()'d the literal `*` path and shipped zero pod logs ("stat failed: no such file" for every pod). Per Grafana Alloy docs, a local.file_match component must expand the glob into concrete file targets first. Add it. Also add stage.cri {} so Loki stores clean messages + real timestamps instead of raw containerd CRI-prefixed lines. Fixes cluster-wide log observability (regression vs the working 2026-05-26 state). Ship-all-then-measure per the agreed plan; Alloy mem limits stay as the IO-storm safeguard. Co-Authored-By: Claude Opus 4.8 --- .../monitoring/modules/monitoring/alloy.yaml | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/stacks/monitoring/modules/monitoring/alloy.yaml b/stacks/monitoring/modules/monitoring/alloy.yaml index 59b84fdb..a938a29a 100644 --- a/stacks/monitoring/modules/monitoring/alloy.yaml +++ b/stacks/monitoring/modules/monitoring/alloy.yaml @@ -95,18 +95,33 @@ alloy: } } + // local.file_match expands the /var/log/pods/*//*.log globs + // that discovery.relabel.pod_logs writes into __path__ (doublestar) into + // concrete file targets. loki.source.file does NOT expand globs itself, so + // feeding it the glob directly makes it stat() the literal `*` path and ship + // ZERO pod logs (regression found 2026-06-05 — this component was missing; + // only node/Pi journals were reaching Loki). See Grafana Alloy docs + // "local.file_match > Send Kubernetes Pod logs to Loki". + local.file_match "pod_logs" { + path_targets = discovery.relabel.pod_logs.output + } + // loki.source.file tails pod logs from /var/log/pods/* on the host filesystem. // Previously used loki.source.kubernetes (apiserver streaming) which drove // kube-apiserver `CONNECT pods/log` to ~13 req/s + ~2200 sec/s of streams. - // discovery.relabel.pod_logs already sets __path__ to the kubelet log path. loki.source.file "pod_logs" { - targets = discovery.relabel.pod_logs.output + targets = local.file_match.pod_logs.targets forward_to = [loki.process.pod_logs.receiver] } // loki.process receives log entries from other Loki components, applies one or more processing stages, // and forwards the results to the list of receivers in the component's arguments. loki.process "pod_logs" { + // Parse the containerd CRI wrapper (" ") so Loki + // stores the clean message + the real timestamp instead of the raw prefixed + // line. All cluster nodes run containerd, so a bare stage.cri is correct. + stage.cri { } + stage.static_labels { values = { cluster = "default",