From 41786b0fcab8ac914389e19fa32b480d73f6fbd5 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 24 May 2026 10:31:03 +0000 Subject: [PATCH] =?UTF-8?q?crowdsec:=20DISABLE=5FONLINE=5FAPI=3Dtrue=20?= =?UTF-8?q?=E2=80=94=20break=20the=20recurring=20403=20crashloop?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CAPI auth at api.crowdsec.net is rejecting watcher logins from inside the cluster within ~1h of registration, even after rotating creds via `cscli capi register`. The same login successfully authenticates from devvm but fails from cluster pods → IP-throttle or account-state issue at the central API. Until that's resolved with CrowdSec support (or the throttle window resets), running with CAPI on is just chronic crashloops on every fresh replica. `DISABLE_ONLINE_API=true` makes the chart entrypoint `conf_set 'del(.api.server.online_client)'`, removing the online_client block entirely. Pods skip CAPI auth, no 403, no crashloop. Trade-off: no community blocklists. Local scenarios + bouncers continue unchanged. Side-effect of disabling CAPI in this chart (v0.21.0) — `role.yaml` is gated on `IsOnlineAPIDisabled=false` while `cscli-lapi-register-job` is gated on `StoreLAPICscliCredentialsInSecret=true` (orthogonal). So the hook runs without the Role it needs, and atomic apply rolls back. Mitigation: pre-created the `crowdsec-lapi-cscli-credentials` Secret manually (the hook short-circuits when the secret already exists) and re-applied the missing Role for future re-enablement. Re-enable path documented in the comment block. Co-Authored-By: Claude Opus 4.7 --- stacks/crowdsec/modules/crowdsec/values.yaml | 38 +++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/stacks/crowdsec/modules/crowdsec/values.yaml b/stacks/crowdsec/modules/crowdsec/values.yaml index 452caab8..f43589fd 100644 --- a/stacks/crowdsec/modules/crowdsec/values.yaml +++ b/stacks/crowdsec/modules/crowdsec/values.yaml @@ -98,20 +98,40 @@ lapi: data: enabled: false env: - - name: ENROLL_KEY - value: "${ENROLL_KEY}" - - name: ENROLL_INSTANCE_NAME - value: "k8s-cluster" - - name: ENROLL_TAGS - value: "k8s linux" + # CAPI disabled 2026-05-24 — TEMPORARY MEASURE. + # + # Symptom: every fresh LAPI replica hit 403 Forbidden on CAPI watcher + # auth at api.crowdsec.net startup → fatal → CrashLoopBackOff. + # Tried `cscli capi register` to rotate creds (worked briefly: the + # newly-registered login `486abb15…` succeeded for ~1 hour from + # inside the cluster, then started returning 403 again). Live probe + # of the same login from devvm STILL works HTTP 200 — looks like + # api.crowdsec.net is throttling or IP-blocking the cluster's + # egress IP (Cloudflare tunnel / PVE NAT) for new sessions. + # + # DISABLE_ONLINE_API=true makes the chart entrypoint + # `conf_set 'del(.api.server.online_client)'` → no CAPI auth call + # at startup → no 403 → pod starts. Also short-circuits the + # `cscli console enroll` step that was the older crashloop trigger + # (per memory id=2606-2613 the enroll key is single-shot too). + # + # Trade-off: no community blocklists from CAPI feeds. Local + # scenarios + bouncers continue unchanged. + # + # Re-enable path (when CrowdSec central cooperates again): + # 1. Generate a fresh enroll key at app.crowdsec.net + # 2. Re-run `cscli capi register -f /tmp/c.yaml` from a pod and + # update `crowdsec-capi-credentials` Secret + # 3. Verify the new login authenticates from INSIDE the cluster + # for at least an hour (not just from devvm) + # 4. Remove DISABLE_ONLINE_API, restore ENROLL_KEY env block + - name: DISABLE_ONLINE_API + value: "true" - name: DB_PASSWORD valueFrom: secretKeyRef: name: crowdsec-lapi-secrets key: dbPassword - # As it's a test, we don't want to share signals with CrowdSec, so disable the Online API. - # - name: DISABLE_ONLINE_API - # value: "true" dashboard: enabled: true env: