From 6139052104411f4b537aadea0ce50df3bd6e3cfe Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 1 Mar 2026 02:36:53 +0000 Subject: [PATCH] [ci skip] add graceful degradation to CrowdSec bouncer middleware P0: Set updateMaxFailure=-1 (fail-open) Previously defaulted to 0 which blocked ALL traffic on first LAPI failure. Now serves from cached decisions when LAPI is unreachable. P1: Enable Redis cache for CrowdSec decisions Decisions are now shared across all 3 Traefik replicas and survive pod restarts. redisCacheUnreachableBlock=false prevents Redis from becoming another SPOF. P1: Add clientTrustedIPs for internal cluster traffic Node CIDR (10.0.20.0/24) and pod CIDR (10.10.0.0/16) bypass CrowdSec entirely, preventing internal cascade failures. --- stacks/platform/main.tf | 1 + stacks/platform/modules/traefik/main.tf | 1 + stacks/platform/modules/traefik/middleware.tf | 11 ++++++++--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/stacks/platform/main.tf b/stacks/platform/main.tf index 7f6cd28b..5f5c4e3b 100644 --- a/stacks/platform/main.tf +++ b/stacks/platform/main.tf @@ -162,6 +162,7 @@ module "traefik" { source = "./modules/traefik" tier = local.tiers.core crowdsec_api_key = var.ingress_crowdsec_api_key + redis_host = var.redis_host tls_secret_name = var.tls_secret_name } diff --git a/stacks/platform/modules/traefik/main.tf b/stacks/platform/modules/traefik/main.tf index d7a9da5a..ccd1cbce 100644 --- a/stacks/platform/modules/traefik/main.tf +++ b/stacks/platform/modules/traefik/main.tf @@ -1,5 +1,6 @@ variable "tier" { type = string } variable "crowdsec_api_key" { type = string } +variable "redis_host" { type = string } variable "tls_secret_name" {} resource "kubernetes_namespace" "traefik" { diff --git a/stacks/platform/modules/traefik/middleware.tf b/stacks/platform/modules/traefik/middleware.tf index 0a5481b6..499a11fe 100644 --- a/stacks/platform/modules/traefik/middleware.tf +++ b/stacks/platform/modules/traefik/middleware.tf @@ -150,9 +150,14 @@ resource "kubernetes_manifest" "middleware_crowdsec" { spec = { plugin = { crowdsec-bouncer = { - crowdsecLapiKey = var.crowdsec_api_key - crowdsecLapiHost = "crowdsec-service.crowdsec.svc.cluster.local:8080" - crowdsecMode = "stream" + crowdsecLapiKey = var.crowdsec_api_key + crowdsecLapiHost = "crowdsec-service.crowdsec.svc.cluster.local:8080" + crowdsecMode = "stream" + updateMaxFailure = -1 # fail-open: serve from cache when LAPI is unreachable + redisCacheEnabled = true + redisCacheHost = var.redis_host + redisCacheUnreachableBlock = false # don't block traffic if Redis is also unreachable + clientTrustedIPs = ["10.0.20.0/24", "10.10.0.0/16"] # node + pod CIDRs bypass CrowdSec } } }