From 882df4cc5c8a21820397bcf44325ecce4cc232ef Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 24 Feb 2026 23:00:45 +0000 Subject: [PATCH] =?UTF-8?q?[ci=20skip]=20kyverno:=20fix=20crash=20loop=20?= =?UTF-8?q?=E2=80=94=20failurePolicy=20Ignore,=20increase=20memory,=20pin?= =?UTF-8?q?=20chart?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Admission controller was restarting every ~5min due to API server timeouts causing leader election loss. failurePolicy:Fail meant the webhook blocked all pod creation cluster-wide when Kyverno was unavailable. --- stacks/platform/modules/kyverno/main.tf | 39 ++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/stacks/platform/modules/kyverno/main.tf b/stacks/platform/modules/kyverno/main.tf index eb50274b..ecfde9e7 100644 --- a/stacks/platform/modules/kyverno/main.tf +++ b/stacks/platform/modules/kyverno/main.tf @@ -16,8 +16,45 @@ resource "helm_release" "kyverno" { repository = "https://kyverno.github.io/kyverno/" chart = "kyverno" + version = "3.6.1" - # values = [templatefile("${path.module}/grafana_chart_values.yaml", { db_password = var.grafana_db_password })] + values = [yamlencode({ + # When Kyverno is unavailable, allow pod creation to proceed without + # mutation/validation rather than blocking all admissions cluster-wide. + features = { + forceFailurePolicyIgnore = { + enabled = true + } + } + + admissionController = { + container = { + resources = { + limits = { + memory = "768Mi" + } + requests = { + cpu = "100m" + memory = "128Mi" + } + } + } + + # More tolerant liveness probe — API server slowness shouldn't kill the pod + livenessProbe = { + httpGet = { + path = "/health/liveness" + port = 9443 + scheme = "HTTPS" + } + initialDelaySeconds = 15 + periodSeconds = 30 + timeoutSeconds = 5 + failureThreshold = 4 + successThreshold = 1 + } + } + })] } # To unlabel all: