# ============================================================================= # CrowdSec edge enforcement for Cloudflare-PROXIED hosts — control plane # ============================================================================= # Proxied hosts terminate at the Cloudflare edge, so the in-cluster CrowdSec # bouncer (which keys on the real client IP seen by Traefik) never gets to # decide on them. To enforce CrowdSec bans/captchas on proxied traffic we push # the decision INTO the Cloudflare edge as account-level IP Lists + a single # zone-scoped WAF custom rule: # # * Two account IP Lists — `crowdsec_ban` and `crowdsec_captcha` — hold the # banned / captcha'd source IPs (empty in TF; populated at runtime). # * A zone-scoped WAF ruleset in the http_request_firewall_custom phase # blocks `(ip.src in $crowdsec_ban)` and managed-challenges # `(ip.src in $crowdsec_captcha)`. Because it's a ZONE rule it enforces # across ALL proxied hosts in the zone (~135), not just the handful a # Worker would route. (The previous Worker+KV design only covered the ~27 # hosts the rybbit Worker routed; the analytics Worker in worker/ is # unrelated and stays.) # # This file is the CONTROL PLANE that keeps those lists in sync with LAPI: # 1. the two empty IP Lists (list ITEMS are owned by the CronJob at runtime, # NOT by Terraform — see the lifecycle ignore_changes on `item`), # 2. a LEAST-PRIVILEGE Cloudflare API token (account Filter-Lists edit only, # scoped to this account) the sync job authenticates with, # 3. a CronJob running lapi_kv_sync.py every 2 min to full-reconcile LAPI # decisions into the two lists (mirrors monitoring/alert_digest.tf: stock # python:3.12-alpine + pure-stdlib script from a ConfigMap, no pip/apk at # runtime). # # Cloudflare provider is pinned v4.52.7 (~> 4) — v4 schema is used throughout # (v5 differs greatly: policy is a block here not a `policies = [...]` list; # resources is a map not a jsonencode'd string; ruleset `rules` is a repeatable # block; list items use `item { value { ip = ... } }`; permission groups are # looked up via data.cloudflare_api_token_permission_groups, not a v5 *_list # data source). context7 only indexes v5, so the v4 arguments below were # verified against the v4.52.7 provider docs (github tag v4.52.7) — items # FLAGGED ### VERIFY for tg-plan are noted inline. # ============================================================================= data "cloudflare_accounts" "main" {} locals { cf_account_id = data.cloudflare_accounts.main.accounts[0].id } # ----------------------------------------------------------------------------- # IP Lists — empty shells. The CronJob owns the items at runtime via the CF # Rules-Lists API; TF must NOT manage items or every 2-min sync would fight the # next `terragrunt apply` (apply would try to delete the runtime items). # # ### VERIFY (v4.52.7): cloudflare_list args account_id/name/kind/description; # kind="ip" is one of {ip, redirect, hostname, asn}. The optional items # block is named `item` (singular, Block Set) with `item { value { ip=... } # comment=... }`. We declare NO `item` blocks (empty list) and # ignore_changes=[item] so runtime items don't show as drift. # NOTE: list `name` must match /^[a-zA-Z0-9_]+$/ (underscores ok, no dashes) # — hence crowdsec_ban / crowdsec_captcha (underscore, not dash). # ----------------------------------------------------------------------------- resource "cloudflare_list" "crowdsec_ban" { account_id = local.cf_account_id name = "crowdsec_ban" kind = "ip" description = "CrowdSec ban decisions (synced from LAPI)" lifecycle { # The crowdsec-cf-sync CronJob adds/removes items at runtime; TF owns only # the empty list shell. Without this, every apply would delete live bans. ignore_changes = [item] } } resource "cloudflare_list" "crowdsec_captcha" { account_id = local.cf_account_id name = "crowdsec_captcha" kind = "ip" description = "CrowdSec captcha decisions (synced from LAPI)" lifecycle { ignore_changes = [item] } } # ----------------------------------------------------------------------------- # Zone-scoped WAF custom ruleset — the actual enforcement. One ruleset, two # rules, applied to EVERY proxied host in the zone. # # ### VERIFY (v4.52.7): cloudflare_ruleset with zone_id + kind="zone" + # phase="http_request_firewall_custom"; `rules` is a repeatable block with # action/expression/description/enabled. actions "block" and # "managed_challenge" are both valid. List references in WAF expressions use # the list NAME with a `$` prefix (NOT the list id): ($crowdsec_ban). # Rule order matters — ban (block) is evaluated before captcha so a # double-listed IP is blocked outright (the sync script also enforces # ban-wins, so an IP is never in both lists, but order is belt-and-braces). # # zone_id is the viktorbarzin.me zone — the single zone id used repo-wide # (default of var.cloudflare_zone_id in modules/kubernetes/ingress_factory and # hardcoded the same in stacks/kms/main.tf; source of truth is the git-crypt'd # config.tfvars). Hardcoded here (with the conventional marker comment) because # the rybbit stack does not import the ingress_factory module. # ----------------------------------------------------------------------------- # Cloudflare allows only ONE entrypoint ruleset per zone+phase, and the zone # already has the stock `default` http_request_firewall_custom ruleset (created # out-of-band, id 106a1342bc88454ea59c47ad3431fe0e). Creating a second one fails # the singleton constraint, so we IMPORT the existing ruleset and manage all of # its rules here: our CrowdSec ban/captcha rules FIRST, and the pre-existing # (currently disabled) skip rule preserved verbatim below it. import { to = cloudflare_ruleset.crowdsec id = "fd2c5dd4efe8fe38958944e74d0ced6d/106a1342bc88454ea59c47ad3431fe0e" } resource "cloudflare_ruleset" "crowdsec" { zone_id = "fd2c5dd4efe8fe38958944e74d0ced6d" # cloudflare_zone_id (viktorbarzin.me) name = "default" kind = "zone" phase = "http_request_firewall_custom" # CrowdSec ban — evaluated FIRST so a banned IP is blocked before anything else. rules { action = "block" expression = "(ip.src in $crowdsec_ban)" description = "CrowdSec: block banned IPs" enabled = true } # CrowdSec captcha — managed challenge for flagged IPs. rules { action = "managed_challenge" expression = "(ip.src in $crowdsec_captcha)" description = "CrowdSec: challenge flagged IPs" enabled = true } # Pre-existing rule, imported and preserved verbatim (currently disabled). rules { action = "skip" expression = "(http.host contains \"viktorbarzin.me\")" description = "skip" enabled = false action_parameters { phases = ["http_ratelimit", "http_request_firewall_managed", "http_request_sbfm"] products = ["uaBlock", "bic", "hot", "securityLevel", "rateLimit", "waf", "zoneLockdown"] ruleset = "current" } } } # ----------------------------------------------------------------------------- # Least-privilege API token for the sync job: account-level Filter-Lists edit # ONLY, scoped to this single account (no zone/DNS/Workers access). The token # value is sensitive and lands in TF state (Tier-1 PG, encrypted at rest) and # in the rybbit Secret below — same trust level as the CF Global API Key # already in state. # # ### VERIFY (v4.52.7): cloudflare_api_token with a repeatable `policy` block # (effect / permission_groups = Set of String / resources = Map of String); # token secret is exposed as `.value` (sensitive). # # ### VERIFY — PERMISSION GROUP NAME (highest-risk item). v4.52.7 deprecates # the flat `.permissions[...]` map ("some permissions overlap resource # scope"); the non-deprecated lookup is the scoped `.account[...]` map. # Cloudflare's current permissions reference calls the account list-edit # group "Account Filter Lists Edit" (and read "Account Filter Lists Read"). # An OLDER community gist instead shows "Account Rule Lists Read/Write" — # Cloudflare has renamed this group over time. If `tg plan` errors with a # missing key, try (in order): .account["Account Filter Lists Edit"] -> # .account["Account Rule Lists Write"], or enumerate the live names with: # terraform console # > data.cloudflare_api_token_permission_groups.all.account # Read is not strictly required for edit (Edit = full CRUDL) but the sync # job GETs items, so we include Read too to be safe. # ----------------------------------------------------------------------------- data "cloudflare_api_token_permission_groups" "all" {} resource "cloudflare_api_token" "list_sync" { name = "rybbit-crowdsec-list-sync" policy { effect = "allow" permission_groups = [ data.cloudflare_api_token_permission_groups.all.account["Account Rule Lists Write"], data.cloudflare_api_token_permission_groups.all.account["Account Rule Lists Read"], ] resources = { "com.cloudflare.api.account.${local.cf_account_id}" = "*" } } } # ----------------------------------------------------------------------------- # Pure-stdlib sync script, mounted into the CronJob from a ConfigMap (the # alert_digest pattern — no per-run package installs). # ----------------------------------------------------------------------------- resource "kubernetes_config_map" "crowdsec_cf_sync_script" { metadata { name = "crowdsec-cf-sync-script" namespace = "rybbit" } data = { "lapi_kv_sync.py" = file("${path.module}/lapi_kv_sync.py") } } # Secrets consumed by the sync job: the LAPI bouncer key (registered in LAPI, # stored in Vault secret/platform -> kvsync_bouncer_key) and the minted CF # token value. Account id and list ids are NOT secret and are passed as plain # env values on the CronJob. resource "kubernetes_secret" "crowdsec_cf_sync" { metadata { name = "crowdsec-cf-sync" namespace = "rybbit" } type = "Opaque" data = { LAPI_KEY = data.vault_kv_secret_v2.cf_platform.data["kvsync_bouncer_key"] CF_API_TOKEN = cloudflare_api_token.list_sync.value } } resource "kubernetes_cron_job_v1" "crowdsec_cf_sync" { metadata { name = "crowdsec-cf-sync" namespace = "rybbit" labels = { app = "crowdsec-cf-sync" tier = local.tiers.aux } } spec { concurrency_policy = "Forbid" failed_jobs_history_limit = 3 successful_jobs_history_limit = 3 schedule = "*/2 * * * *" starting_deadline_seconds = 110 job_template { metadata {} spec { backoff_limit = 2 ttl_seconds_after_finished = 3600 template { metadata { labels = { app = "crowdsec-cf-sync" } } spec { restart_policy = "OnFailure" container { name = "crowdsec-cf-sync" image = "docker.io/library/python:3.12-alpine" image_pull_policy = "IfNotPresent" command = ["python3", "/scripts/lapi_kv_sync.py"] env { name = "LAPI_KEY" value_from { secret_key_ref { name = kubernetes_secret.crowdsec_cf_sync.metadata[0].name key = "LAPI_KEY" } } } env { name = "CF_API_TOKEN" value_from { secret_key_ref { name = kubernetes_secret.crowdsec_cf_sync.metadata[0].name key = "CF_API_TOKEN" } } } env { name = "CF_ACCOUNT_ID" value = local.cf_account_id } env { name = "CF_BAN_LIST_ID" value = cloudflare_list.crowdsec_ban.id } env { name = "CF_CAPTCHA_LIST_ID" value = cloudflare_list.crowdsec_captcha.id } env { name = "PUSHGATEWAY_URL" value = "http://prometheus-prometheus-pushgateway.monitoring:9091" } volume_mount { name = "script" mount_path = "/scripts" read_only = true } resources { requests = { cpu = "10m" memory = "48Mi" } limits = { memory = "96Mi" } } } volume { name = "script" config_map { name = kubernetes_config_map.crowdsec_cf_sync_script.metadata[0].name } } dns_config { option { name = "ndots" value = "2" } } } } } } } lifecycle { # KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2 ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config] } }