From 31f3fc0773c7b60daa09e4037da77e737cfafd5c Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Mon, 2 Mar 2026 21:39:14 +0000 Subject: [PATCH] [ci skip] fix OOMKill: prometheus (4Gi), kyverno-reports (512Mi), grampsweb (512Mi) - Prometheus server: explicit 1Gi req / 4Gi limit (was inheriting 512Mi LimitRange default) - Kyverno reports controller: 128Mi req / 512Mi limit (was 128Mi Helm default) - Grampsweb: 256Mi req / 512Mi limit for both containers (was 256Mi LimitRange default) --- stacks/grampsweb/main.tf | 22 +++++++++++++++++++ stacks/platform/modules/kyverno/main.tf | 12 ++++++++++ .../monitoring/prometheus_chart_values.tpl | 7 ++++++ 3 files changed, 41 insertions(+) diff --git a/stacks/grampsweb/main.tf b/stacks/grampsweb/main.tf index 7b7b972d..5d8799f1 100644 --- a/stacks/grampsweb/main.tf +++ b/stacks/grampsweb/main.tf @@ -182,6 +182,17 @@ resource "kubernetes_deployment" "grampsweb" { mount_path = "/tmp" sub_path = "tmp" } + + resources { + requests = { + cpu = "50m" + memory = "256Mi" + } + limits = { + cpu = "500m" + memory = "512Mi" + } + } } container { @@ -237,6 +248,17 @@ resource "kubernetes_deployment" "grampsweb" { mount_path = "/tmp" sub_path = "tmp" } + + resources { + requests = { + cpu = "50m" + memory = "256Mi" + } + limits = { + cpu = "500m" + memory = "512Mi" + } + } } volume { diff --git a/stacks/platform/modules/kyverno/main.tf b/stacks/platform/modules/kyverno/main.tf index 801db91a..67ca1769 100644 --- a/stacks/platform/modules/kyverno/main.tf +++ b/stacks/platform/modules/kyverno/main.tf @@ -27,6 +27,18 @@ resource "helm_release" "kyverno" { } } + reportsController = { + resources = { + limits = { + memory = "512Mi" + } + requests = { + cpu = "100m" + memory = "128Mi" + } + } + } + admissionController = { replicas = 2 diff --git a/stacks/platform/modules/monitoring/prometheus_chart_values.tpl b/stacks/platform/modules/monitoring/prometheus_chart_values.tpl index 6f876842..dfbc67a8 100755 --- a/stacks/platform/modules/monitoring/prometheus_chart_values.tpl +++ b/stacks/platform/modules/monitoring/prometheus_chart_values.tpl @@ -121,6 +121,13 @@ server: existingClaim: prometheus-iscsi-pvc # storageClass: rook-cephfs retention: "52w" + resources: + requests: + cpu: 200m + memory: 1Gi + limits: + cpu: "2" + memory: 4Gi strategy: type: Recreate baseURL: "https://prometheus.viktorbarzin.me"