From 16b3969ceb71f121107fdb354195e0751424ba05 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 02:08:35 +0000 Subject: [PATCH] alloy: move resources to alloy.* (chart key bug); 1Gi limit fixes IO storm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Alloy Helm chart maps `alloy.resources`, NOT `controller.resources`, onto the alloy container. The block under `controller:` was silently dropped, so the container ran with `resources: {}` and inherited the Kyverno LimitRange `tier-defaults` 256Mi — well below Alloy's 400-450Mi steady state. The cgroup ran at 255.8/256MB with ~50M memory-reclaim events, page-cache thrashing drove ~185 MB/s sdc reads (12.18 TB in 24h), saturating the Proxmox host and rippling out to all VMs + NFS. Fix: - Move resources to `alloy.resources` (correct chart key). - Burstable QoS: request 512Mi, limit 1Gi. Workers are at 97-99% memory-request saturation cluster-wide; a 1Gi request blocks scheduling on node2/node3. - Bump controller.updateStrategy.maxUnavailable to 50% so a 5-pod DS rolling update fits inside the helm timeout. - Bump helm_release.alloy.timeout to 900s (default 300s was too short with occasional runc-stuck-Terminating on k8s-master). Verified: all 4 alloy pods now show 1Gi/512Mi at the container level; helm rev=8 deployed; per-pod memory 99-108Mi at steady state (well under the new limit). Memory ID 2726. Co-Authored-By: Claude Opus 4.7 --- .../monitoring/modules/monitoring/alloy.yaml | 32 +++++++++++++------ stacks/monitoring/modules/monitoring/loki.tf | 5 +-- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/stacks/monitoring/modules/monitoring/alloy.yaml b/stacks/monitoring/modules/monitoring/alloy.yaml index 900cbb25..59b84fdb 100644 --- a/stacks/monitoring/modules/monitoring/alloy.yaml +++ b/stacks/monitoring/modules/monitoring/alloy.yaml @@ -1,4 +1,18 @@ alloy: + # Resource limits for the alloy container itself. + # Must be under `alloy.resources` (NOT `controller.resources`) — the chart + # only maps THIS key onto the alloy container. Without it, the container gets + # `resources: {}` and inherits Kyverno LimitRange `tier-defaults` (256Mi), + # which is below Alloy's 400-450Mi steady state and caused page-cache + # thrashing → 185 MB/s sdc reads → host IO saturation (2026-05-26). + # Burstable QoS (request < limit) — workers are at 97-99% memory-request + # saturation; a 1Gi request blocks scheduling on node2/node3. + resources: + requests: + cpu: 50m + memory: 512Mi + limits: + memory: 1Gi configMap: content: |- // Write your Alloy config here: @@ -183,6 +197,14 @@ alloy: readOnly: true controller: + # Bump maxUnavailable above the chart default (1) so a 5-node DS finishes its + # rolling update inside the helm_release timeout. Log shipper tolerates the + # brief gap. + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 50% + volumes: extra: - name: journal-run @@ -206,13 +228,3 @@ controller: operator: "Exists" effect: "NoSchedule" - # Resource limits for DaemonSet pods - # Alloy tails logs from all containers on the node via K8s API and batches - # them to Loki. Memory scales with number of active log streams (~30-50 per node). - # 128Mi was OOMKilled; steady-state usage is ~400-450Mi per pod. - resources: - requests: - cpu: 50m - memory: 512Mi - limits: - memory: 1Gi diff --git a/stacks/monitoring/modules/monitoring/loki.tf b/stacks/monitoring/modules/monitoring/loki.tf index 72aa4da2..0b5f3d45 100644 --- a/stacks/monitoring/modules/monitoring/loki.tf +++ b/stacks/monitoring/modules/monitoring/loki.tf @@ -28,8 +28,9 @@ resource "helm_release" "alloy" { repository = "https://grafana.github.io/helm-charts" chart = "alloy" - values = [file("${path.module}/alloy.yaml")] - atomic = true + values = [file("${path.module}/alloy.yaml")] + atomic = true + timeout = 900 # 5-pod DS rolling update + occasional runc-stuck-Terminating on k8s-master needs >300s default depends_on = [helm_release.loki] }