From 16b3969ceb71f121107fdb354195e0751424ba05 Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Tue, 26 May 2026 02:08:35 +0000
Subject: [PATCH] alloy: move resources to alloy.* (chart key bug); 1Gi limit
 fixes IO storm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Alloy Helm chart maps `alloy.resources`, NOT `controller.resources`, onto
the alloy container. The block under `controller:` was silently dropped, so
the container ran with `resources: {}` and inherited the Kyverno LimitRange
`tier-defaults` 256Mi — well below Alloy's 400-450Mi steady state. The
cgroup ran at 255.8/256MB with ~50M memory-reclaim events, page-cache
thrashing drove ~185 MB/s sdc reads (12.18 TB in 24h), saturating the
Proxmox host and rippling out to all VMs + NFS.

Fix:
- Move resources to `alloy.resources` (correct chart key).
- Burstable QoS: request 512Mi, limit 1Gi. Workers are at 97-99%
  memory-request saturation cluster-wide; a 1Gi request blocks
  scheduling on node2/node3.
- Bump controller.updateStrategy.maxUnavailable to 50% so a 5-pod DS
  rolling update fits inside the helm timeout.
- Bump helm_release.alloy.timeout to 900s (default 300s was too short
  with occasional runc-stuck-Terminating on k8s-master).

Verified: all 4 alloy pods now show 1Gi/512Mi at the container level;
helm rev=8 deployed; per-pod memory 99-108Mi at steady state (well
under the new limit).

Memory ID 2726.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../monitoring/modules/monitoring/alloy.yaml  | 32 +++++++++++++------
 stacks/monitoring/modules/monitoring/loki.tf  |  5 +--
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/stacks/monitoring/modules/monitoring/alloy.yaml b/stacks/monitoring/modules/monitoring/alloy.yaml
index 900cbb25..59b84fdb 100644
--- a/stacks/monitoring/modules/monitoring/alloy.yaml
+++ b/stacks/monitoring/modules/monitoring/alloy.yaml
@@ -1,4 +1,18 @@
 alloy:
+  # Resource limits for the alloy container itself.
+  # Must be under `alloy.resources` (NOT `controller.resources`) — the chart
+  # only maps THIS key onto the alloy container. Without it, the container gets
+  # `resources: {}` and inherits Kyverno LimitRange `tier-defaults` (256Mi),
+  # which is below Alloy's 400-450Mi steady state and caused page-cache
+  # thrashing → 185 MB/s sdc reads → host IO saturation (2026-05-26).
+  # Burstable QoS (request < limit) — workers are at 97-99% memory-request
+  # saturation; a 1Gi request blocks scheduling on node2/node3.
+  resources:
+    requests:
+      cpu: 50m
+      memory: 512Mi
+    limits:
+      memory: 1Gi
   configMap:
     content: |-
       // Write your Alloy config here:
@@ -183,6 +197,14 @@ alloy:
         readOnly: true
 
 controller:
+  # Bump maxUnavailable above the chart default (1) so a 5-node DS finishes its
+  # rolling update inside the helm_release timeout. Log shipper tolerates the
+  # brief gap.
+  updateStrategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxUnavailable: 50%
+
   volumes:
     extra:
       - name: journal-run
@@ -206,13 +228,3 @@ controller:
       operator: "Exists"
       effect: "NoSchedule"
 
-  # Resource limits for DaemonSet pods
-  # Alloy tails logs from all containers on the node via K8s API and batches
-  # them to Loki. Memory scales with number of active log streams (~30-50 per node).
-  # 128Mi was OOMKilled; steady-state usage is ~400-450Mi per pod.
-  resources:
-    requests:
-      cpu: 50m
-      memory: 512Mi
-    limits:
-      memory: 1Gi
diff --git a/stacks/monitoring/modules/monitoring/loki.tf b/stacks/monitoring/modules/monitoring/loki.tf
index 72aa4da2..0b5f3d45 100644
--- a/stacks/monitoring/modules/monitoring/loki.tf
+++ b/stacks/monitoring/modules/monitoring/loki.tf
@@ -28,8 +28,9 @@ resource "helm_release" "alloy" {
   repository = "https://grafana.github.io/helm-charts"
   chart      = "alloy"
 
-  values = [file("${path.module}/alloy.yaml")]
-  atomic = true
+  values  = [file("${path.module}/alloy.yaml")]
+  atomic  = true
+  timeout = 900 # 5-pod DS rolling update + occasional runc-stuck-Terminating on k8s-master needs >300s default
 
   depends_on = [helm_release.loki]
 }