From 12fbc404ec59d80afa3851c2248682d6702114a7 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 10 May 2026 00:21:56 +0000 Subject: [PATCH] =?UTF-8?q?anubis:=20strict=20bot=20policy=20=E2=80=94=20c?= =?UTF-8?q?atch-all=20CHALLENGE=20for=20unmatched=20UAs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default upstream policy only WEIGHs Mozilla|Opera UAs and lets everything else (curl, wget, python-requests, scrapy, headless CLI scrapers) fall through to the implicit ALLOW. On non-CDN-fronted hosts (kms, anything dns_type=non-proxied) this meant a plain `curl https://kms.viktorbarzin.me/` returned the real backend content with no challenge — defeating the whole point of the "avoid casual scrapers" intent. Now the module ships a custom POLICY_FNAME mounted via ConfigMap: - Imports the upstream deny-pathological / ai-block-aggressive / allow-good-crawlers / keep-internet-working snippets unchanged - Adds a final `path_regex: .*` → action: CHALLENGE catch-all Result: only IP-verified search engines (Googlebot from Google IPs, Bingbot, etc.) and well-known paths (robots.txt, .well-known, favicon, sitemap) skip the challenge. Everything else — including spoofed-Googlebot-UA-from-random-IP — solves PoW or gets nothing. Verified post-apply: curl default UA on viktorbarzin.me + kms + travel returns the Anubis challenge HTML; /robots.txt still 200s straight through. --- modules/kubernetes/anubis_instance/main.tf | 60 ++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/modules/kubernetes/anubis_instance/main.tf b/modules/kubernetes/anubis_instance/main.tf index 991da06a..c316921f 100644 --- a/modules/kubernetes/anubis_instance/main.tf +++ b/modules/kubernetes/anubis_instance/main.tf @@ -66,6 +66,12 @@ variable "memory" { description = "requests==limits memory. Anubis docs suggest 128Mi handles many concurrent clients." } +variable "policy_yaml" { + type = string + default = null + description = "Override the strict default bot-policy YAML. Leave null to use the catch-all CHALLENGE policy." +} + variable "cpu_request" { type = string default = "20m" @@ -81,6 +87,45 @@ locals { "app.kubernetes.io/component" = "ai-bot-challenge" "app.kubernetes.io/managed-by" = "terraform" } + + # Strict bot policy. Default Anubis policy only WEIGHs Mozilla|Opera UAs + # and lets unmatched UAs (curl, wget, Python-requests, scrapy, headless + # CLI scrapers) fall through to ALLOW. We import the same upstream + # snippets and append a catch-all CHALLENGE so anyone without JS+PoW + # capability is filtered. + default_policy_yaml = <<-EOT + bots: + # Hard-deny known-bad bots first. + - import: (data)/bots/_deny-pathological.yaml + - import: (data)/bots/aggressive-brazilian-scrapers.yaml + # Hard-deny declared AI/LLM crawlers (ClaudeBot, GPTBot, Bytespider, …). + - import: (data)/meta/ai-block-aggressive.yaml + # Whitelist legitimate search-engine crawlers (Googlebot, Bingbot, …). + - import: (data)/crawlers/_allow-good.yaml + # Challenge Firefox AI previews specifically. + - import: (data)/clients/x-firefox-ai.yaml + # Allow /.well-known, /robots.txt, /favicon.*, /sitemap.xml — keeps + # the internet working for benign crawlers and discovery clients. + - import: (data)/common/keep-internet-working.yaml + # Catch-all: every remaining request must solve the challenge. This + # closes the "unmatched UA falls through to ALLOW" gap that lets + # curl/wget/Python-requests scrape non-CDN-fronted hosts. + - name: catchall-challenge + path_regex: .* + action: CHALLENGE + EOT +} + +# Bot policy ConfigMap. Mounted into the pod and referenced by POLICY_FNAME. +resource "kubernetes_config_map" "policy" { + metadata { + name = "${local.full_name}-policy" + namespace = var.namespace + labels = local.labels + } + data = { + "botPolicies.yaml" = coalesce(var.policy_yaml, local.default_policy_yaml) + } } # ED25519 signing key — pulled from Vault `secret/viktor` -> field @@ -222,12 +267,21 @@ resource "kubernetes_deployment" "anubis" { # Mounted from the ESO-managed Secret below. value = "/keys/key" } + env { + name = "POLICY_FNAME" + value = "/config/botPolicies.yaml" + } volume_mount { name = "ed25519-key" mount_path = "/keys" read_only = true } + volume_mount { + name = "policy" + mount_path = "/config" + read_only = true + } resources { requests = { @@ -281,6 +335,12 @@ resource "kubernetes_deployment" "anubis" { } } } + volume { + name = "policy" + config_map { + name = kubernetes_config_map.policy.metadata[0].name + } + } } } }