infra/modules/kubernetes/anubis_instance/main.tf
Viktor Barzin 3da01e6e1e anubis: only challenge GET requests; allow everything else
PrivateBin's XHR `POST /` (paste creation) was the trigger — Anubis's
catch-all CHALLENGE rule served an HTML challenge page where the JS
expected JSON, breaking paste creation entirely. Same shape will hit
any SPA XHR or CORS preflight on the other 8 Anubis-fronted sites
(homepage actions, kms upload-then-poll, wrongmove search refresh,
jsoncrack share, etc.) the moment it gets exercised.

Add an `ALLOW` rule keyed on `method != "GET"` between the AI/UA-block
imports and the catch-all CHALLENGE. Rationale:

  * AI scrapers consume GET response bodies — they don't POST.
  * State-mutating XHRs and OPTIONS preflight need to bypass the
    challenge or the app breaks.
  * CrowdSec + per-route rate-limit + app-level auth already cover
    abuse on mutating methods, so this gives up nothing.
  * Hard-deny rules for known-bad bots run first, so a declared bad
    bot can't sneak through by sending a POST.

Also added a `checksum/policy` annotation on the Anubis pod template
sourced from `sha256(coalesce(var.policy_yaml, default_policy_yaml))`
so future policy changes auto-roll the deployment instead of needing
a manual `kubectl rollout restart`.

f1-stream had its own policy override (path carve-outs for SvelteKit
asset hashes and JSON data routes); mirrored the new rule there too.

Applied to all 8 Anubis-fronted stacks: blog, kms, f1-stream,
travel_blog, real-estate-crawler, homepage, cyberchef, jsoncrack.
Verified per stack: GET / returns the Anubis challenge page; POST,
PUT, DELETE, OPTIONS pass through to the backend (HTTP 301/405/502
from the upstream app, never the Anubis "not a bot" HTML).
2026-05-22 14:16:40 +00:00

423 lines
12 KiB
HCL

terraform {
required_providers {
kubernetes = {
source = "hashicorp/kubernetes"
}
}
}
# Per-site Anubis reverse proxy.
# Sits between Traefik and the real backend. On first visit, serves a
# proof-of-work challenge; on success, drops a long-lived JWT cookie and
# proxies the request through to `target_url`.
#
# Sharing a single ed25519 signing key across instances + COOKIE_DOMAIN at
# the registrable domain means a token solved on one viktorbarzin.me subdomain
# is honoured by every other Anubis-fronted site.
variable "name" {
type = string
description = "Short logical name (e.g. \"blog\"). Used to derive Service / Deployment / Secret names as anubis-<name>."
}
variable "namespace" {
type = string
description = "Namespace to deploy into — typically the same as the protected backend service."
}
variable "target_url" {
type = string
description = "Backend URL Anubis forwards passing requests to (e.g. http://blog.website.svc.cluster.local)."
}
variable "cookie_domain" {
type = string
default = "viktorbarzin.me"
description = "Cookie domain — set to the registrable domain so a single PoW solve covers every Anubis-fronted subdomain."
}
variable "difficulty" {
type = number
default = 2
description = "PoW difficulty (leading-zero hex chars). 2 = ~250ms desktop / ~700ms mobile. Bump for stronger filtering."
}
variable "cookie_expiration_hours" {
type = number
default = 720 # 30 days
description = "Lifetime of the issued JWT cookie in hours."
}
variable "image_tag" {
type = string
default = "v1.25.0"
description = "ghcr.io/techarohq/anubis tag — pin to a release, never :latest."
}
variable "replicas" {
type = number
default = 1
description = "Replica count. Default 1 because Anubis stores in-flight challenges in process memory — with N>1 a challenge issued by pod A and solved against pod B fails with `store: key not found` (HTTP 500). For HA, configure a shared store (Redis) and bump this. Per-pod 128Mi @ idle is cheap, single-pod restart is sub-second, so 1 is fine for content sites."
}
variable "memory" {
type = string
default = "128Mi"
description = "requests==limits memory. Anubis docs suggest 128Mi handles many concurrent clients."
}
variable "policy_yaml" {
type = string
default = null
description = "Override the strict default bot-policy YAML. Leave null to use the catch-all CHALLENGE policy."
}
variable "cpu_request" {
type = string
default = "20m"
description = "CPU request. PoW verification is server-cheap (just hash check)."
}
locals {
full_name = "anubis-${var.name}"
labels = {
"app" = local.full_name
"app.kubernetes.io/name" = "anubis"
"app.kubernetes.io/instance" = local.full_name
"app.kubernetes.io/component" = "ai-bot-challenge"
"app.kubernetes.io/managed-by" = "terraform"
}
# Strict bot policy. Default Anubis policy only WEIGHs Mozilla|Opera UAs
# and lets unmatched UAs (curl, wget, Python-requests, scrapy, headless
# CLI scrapers) fall through to ALLOW. We import the same upstream
# snippets and append a catch-all CHALLENGE so anyone without JS+PoW
# capability is filtered.
default_policy_yaml = <<-EOT
bots:
# Hard-deny known-bad bots first — runs before the method bypass so
# a declared bad bot can't sneak through by sending a POST.
- import: (data)/bots/_deny-pathological.yaml
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
# Hard-deny declared AI/LLM crawlers (ClaudeBot, GPTBot, Bytespider, …).
- import: (data)/meta/ai-block-aggressive.yaml
# Whitelist legitimate search-engine crawlers (Googlebot, Bingbot, …).
- import: (data)/crawlers/_allow-good.yaml
# Challenge Firefox AI previews specifically.
- import: (data)/clients/x-firefox-ai.yaml
# Allow /.well-known, /robots.txt, /favicon.*, /sitemap.xml — keeps
# the internet working for benign crawlers and discovery clients.
- import: (data)/common/keep-internet-working.yaml
# Allow every non-GET request through. Rationale: AI scrapers steal
# the body of GETs (page content) — they don't POST. State-mutating
# methods come from app XHRs (PrivateBin paste creation, Komga
# uploads, SPA actions) and CORS preflight (OPTIONS). Challenging
# those breaks the app, because the JS expects JSON and gets the
# Anubis HTML challenge page. CrowdSec + rate-limit + per-app auth
# already cover abuse on these methods.
- name: allow-non-get-methods
action: ALLOW
expression: method != "GET"
# Catch-all: every remaining (GET) request must solve the challenge.
# This closes the "unmatched UA falls through to ALLOW" gap that
# lets curl/wget/Python-requests scrape non-CDN-fronted hosts.
- name: catchall-challenge
path_regex: .*
action: CHALLENGE
EOT
}
# Bot policy ConfigMap. Mounted into the pod and referenced by POLICY_FNAME.
resource "kubernetes_config_map" "policy" {
metadata {
name = "${local.full_name}-policy"
namespace = var.namespace
labels = local.labels
}
data = {
"botPolicies.yaml" = coalesce(var.policy_yaml, local.default_policy_yaml)
}
}
# ED25519 signing key — pulled from Vault `secret/viktor` -> field
# `anubis_ed25519_key`. Same key across every instance so JWTs are
# cross-validatable, enabling cross-subdomain SSO.
resource "kubernetes_manifest" "ed25519_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "${local.full_name}-key"
namespace = var.namespace
}
spec = {
refreshInterval = "1h"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = {
name = "${local.full_name}-key"
creationPolicy = "Owner"
}
data = [{
secretKey = "key"
remoteRef = {
key = "viktor"
property = "anubis_ed25519_key"
}
}]
}
}
}
resource "kubernetes_deployment" "anubis" {
metadata {
name = local.full_name
namespace = var.namespace
labels = local.labels
}
spec {
replicas = var.replicas
selector {
match_labels = { app = local.full_name }
}
strategy {
type = "RollingUpdate"
rolling_update {
max_surge = 1
max_unavailable = 0
}
}
template {
metadata {
labels = local.labels
annotations = {
# Roll the deployment whenever the policy YAML changes — Anubis
# reads the policy at startup, so a ConfigMap update alone
# doesn't take effect until pods restart.
"checksum/policy" = sha256(coalesce(var.policy_yaml, local.default_policy_yaml))
}
}
spec {
# Spread replicas across nodes to survive a single node failure.
topology_spread_constraint {
max_skew = 1
topology_key = "kubernetes.io/hostname"
when_unsatisfiable = "ScheduleAnyway"
label_selector {
match_labels = { app = local.full_name }
}
}
container {
name = "anubis"
image = "ghcr.io/techarohq/anubis:${var.image_tag}"
port {
name = "http"
container_port = 8923
}
port {
name = "metrics"
container_port = 9090
}
env {
name = "BIND"
value = ":8923"
}
env {
name = "METRICS_BIND"
value = ":9090"
}
env {
name = "TARGET"
value = var.target_url
}
env {
name = "DIFFICULTY"
value = tostring(var.difficulty)
}
env {
name = "COOKIE_EXPIRATION_TIME"
value = "${var.cookie_expiration_hours}h"
}
# Cross-subdomain SSO: cookie scoped to the registrable domain so
# a JWT solved on any Anubis-fronted subdomain is honoured on every
# other one. (COOKIE_DOMAIN and COOKIE_DYNAMIC_DOMAIN are mutually
# exclusive — picking the explicit form.)
env {
name = "COOKIE_DOMAIN"
value = var.cookie_domain
}
env {
name = "COOKIE_SECURE"
value = "true"
}
env {
name = "COOKIE_SAME_SITE"
value = "Lax"
}
# Built-in robots.txt that disallows known AI scrapers — well-behaved
# bots get blocked here without ever paying the PoW cost.
env {
name = "SERVE_ROBOTS_TXT"
value = "true"
}
# Drop cluster-internal IPs from XFF so Anubis sees the real client.
env {
name = "XFF_STRIP_PRIVATE"
value = "true"
}
env {
name = "SLOG_LEVEL"
value = "INFO"
}
env {
name = "ED25519_PRIVATE_KEY_HEX_FILE"
# Mounted from the ESO-managed Secret below.
value = "/keys/key"
}
env {
name = "POLICY_FNAME"
value = "/config/botPolicies.yaml"
}
volume_mount {
name = "ed25519-key"
mount_path = "/keys"
read_only = true
}
volume_mount {
name = "policy"
mount_path = "/config"
read_only = true
}
resources {
requests = {
cpu = var.cpu_request
memory = var.memory
}
limits = {
memory = var.memory
}
}
# Liveness + readiness on the metrics endpoint (zero auth, always 200).
liveness_probe {
http_get {
path = "/metrics"
port = "metrics"
}
initial_delay_seconds = 10
period_seconds = 30
failure_threshold = 3
}
readiness_probe {
http_get {
path = "/metrics"
port = "metrics"
}
initial_delay_seconds = 2
period_seconds = 5
failure_threshold = 2
}
security_context {
run_as_non_root = true
run_as_user = 1000
run_as_group = 1000
allow_privilege_escalation = false
read_only_root_filesystem = true
capabilities {
drop = ["ALL"]
}
}
}
volume {
name = "ed25519-key"
secret {
secret_name = "${local.full_name}-key"
items {
key = "key"
path = "key"
}
}
}
volume {
name = "policy"
config_map {
name = kubernetes_config_map.policy.metadata[0].name
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].template[0].spec[0].dns_config]
}
depends_on = [kubernetes_manifest.ed25519_secret]
}
resource "kubernetes_service" "anubis" {
metadata {
name = local.full_name
namespace = var.namespace
labels = local.labels
annotations = {
"prometheus.io/scrape" = "true"
"prometheus.io/path" = "/metrics"
"prometheus.io/port" = "9090"
}
}
spec {
selector = { app = local.full_name }
port {
name = "http"
port = 8080
target_port = 8923
protocol = "TCP"
}
port {
name = "metrics"
port = 9090
target_port = 9090
protocol = "TCP"
}
}
}
resource "kubernetes_pod_disruption_budget_v1" "anubis" {
metadata {
name = local.full_name
namespace = var.namespace
}
spec {
min_available = "1"
selector {
match_labels = { app = local.full_name }
}
}
}
output "service_name" {
value = kubernetes_service.anubis.metadata[0].name
description = "ClusterIP service name. Pass this to ingress_factory's `service_name` so Traefik routes through Anubis."
}
output "service_port" {
value = 8080
description = "Service port. Anubis listens on 8923 inside; the Service exposes 8080."
}