anubis: strict bot policy — catch-all CHALLENGE for unmatched UAs

The default upstream policy only WEIGHs Mozilla|Opera UAs and lets everything else (curl, wget, python-requests, scrapy, headless CLI scrapers) fall through to the implicit ALLOW. On non-CDN-fronted hosts (kms, anything dns_type=non-proxied) this meant a plain `curl https://kms.viktorbarzin.me/` returned the real backend content with no challenge — defeating the whole point of the "avoid casual scrapers" intent. Now the module ships a custom POLICY_FNAME mounted via ConfigMap: - Imports the upstream deny-pathological / ai-block-aggressive / allow-good-crawlers / keep-internet-working snippets unchanged - Adds a final `path_regex: .*` → action: CHALLENGE catch-all Result: only IP-verified search engines (Googlebot from Google IPs, Bingbot, etc.) and well-known paths (robots.txt, .well-known, favicon, sitemap) skip the challenge. Everything else — including spoofed-Googlebot-UA-from-random-IP — solves PoW or gets nothing. Verified post-apply: curl default UA on viktorbarzin.me + kms + travel returns the Anubis challenge HTML; /robots.txt still 200s straight through.
2026-05-10 00:21:56 +00:00 · 2026-05-10 00:21:56 +00:00 · 12fbc404ec
commit 12fbc404ec
parent c73cd26a73
1 changed files with 60 additions and 0 deletions
--- a/modules/kubernetes/anubis_instance/main.tf
+++ b/modules/kubernetes/anubis_instance/main.tf
@ -66,6 +66,12 @@ variable "memory" {
  description = "requests==limits memory. Anubis docs suggest 128Mi handles many concurrent clients."
 }

+variable "policy_yaml" {
+  type        = string
+  default     = null
+  description = "Override the strict default bot-policy YAML. Leave null to use the catch-all CHALLENGE policy."
+}
+
 variable "cpu_request" {
  type        = string
  default     = "20m"
@ -81,6 +87,45 @@ locals {
    "app.kubernetes.io/component"  = "ai-bot-challenge"
    "app.kubernetes.io/managed-by" = "terraform"
  }
+
+  # Strict bot policy. Default Anubis policy only WEIGHs Mozilla|Opera UAs
+  # and lets unmatched UAs (curl, wget, Python-requests, scrapy, headless
+  # CLI scrapers) fall through to ALLOW. We import the same upstream
+  # snippets and append a catch-all CHALLENGE so anyone without JS+PoW
+  # capability is filtered.
+  default_policy_yaml = <<-EOT
+    bots:
+      # Hard-deny known-bad bots first.
+      - import: (data)/bots/_deny-pathological.yaml
+      - import: (data)/bots/aggressive-brazilian-scrapers.yaml
+      # Hard-deny declared AI/LLM crawlers (ClaudeBot, GPTBot, Bytespider, …).
+      - import: (data)/meta/ai-block-aggressive.yaml
+      # Whitelist legitimate search-engine crawlers (Googlebot, Bingbot, …).
+      - import: (data)/crawlers/_allow-good.yaml
+      # Challenge Firefox AI previews specifically.
+      - import: (data)/clients/x-firefox-ai.yaml
+      # Allow /.well-known, /robots.txt, /favicon.*, /sitemap.xml — keeps
+      # the internet working for benign crawlers and discovery clients.
+      - import: (data)/common/keep-internet-working.yaml
+      # Catch-all: every remaining request must solve the challenge. This
+      # closes the "unmatched UA falls through to ALLOW" gap that lets
+      # curl/wget/Python-requests scrape non-CDN-fronted hosts.
+      - name: catchall-challenge
+        path_regex: .*
+        action: CHALLENGE
+  EOT
+}
+
+# Bot policy ConfigMap. Mounted into the pod and referenced by POLICY_FNAME.
+resource "kubernetes_config_map" "policy" {
+  metadata {
+    name      = "${local.full_name}-policy"
+    namespace = var.namespace
+    labels    = local.labels
+  }
+  data = {
+    "botPolicies.yaml" = coalesce(var.policy_yaml, local.default_policy_yaml)
+  }
 }

 # ED25519 signing key — pulled from Vault `secret/viktor` -> field
@ -222,12 +267,21 @@ resource "kubernetes_deployment" "anubis" {
            # Mounted from the ESO-managed Secret below.
            value = "/keys/key"
          }
+          env {
+            name  = "POLICY_FNAME"
+            value = "/config/botPolicies.yaml"
+          }

          volume_mount {
            name       = "ed25519-key"
            mount_path = "/keys"
            read_only  = true
          }
+          volume_mount {
+            name       = "policy"
+            mount_path = "/config"
+            read_only  = true
+          }

          resources {
            requests = {
@ -281,6 +335,12 @@ resource "kubernetes_deployment" "anubis" {
            }
          }
        }
+        volume {
+          name = "policy"
+          config_map {
+            name = kubernetes_config_map.policy.metadata[0].name
+          }
+        }
      }
    }
  }