anubis: HA with shared valkey/redis store + replicas=2
Anubis pre-2026-05-16 ran at replicas=1 because in-flight PoW challenge
state lived in process memory — a challenge issued by pod A wouldn't be
verifiable by pod B (HTTP 500 "store: key not found"). The PDB at
`minAvailable=1` made this worse: with replicas=1 the eviction API can
NEVER satisfy the constraint, so every drain on a node hosting an Anubis
pod looped forever. This is what stalled the manual K8s upgrade on
2026-05-11 (had to delete pods directly to bypass eviction) and was
about to block kured on Monday 2026-05-18 once the kured sentinel fix
landed.
Anubis upstream has first-class support for a Valkey/Redis-protocol
shared store (documented as the "Kubernetes worker pool" pattern).
Wire it up:
- modules/kubernetes/anubis_instance: add `shared_store_url` variable.
When set, appends a `store: { backend: valkey, parameters: { url } }`
block to the rendered policy YAML and defaults replicas to 2 (capped
at 2). PDB switched from `minAvailable=1` to `maxUnavailable=1` so
drains can take down one pod at a time. topologySpreadConstraint
tightened to `DoNotSchedule` so the two replicas land on different
nodes — a single node loss never takes a whole Anubis instance down.
- All 8 call sites (cyberchef, jsoncrack, kms, homepage, blog,
travel_blog, real-estate-crawler, f1-stream) opted in. Each picks a
unique Redis DB index (5–12) on `redis-master.redis:6379`. Cluster
Redis already runs HA via Sentinel + haproxy, no new infra needed.
Verified: every Anubis Deployment now 2/2 Ready with pods on different
nodes; PDBs allow 1 disruption; Redis DBs 5,7,8,10 already populated
by live traffic post-apply; Palo Alto Networks scanner hit blog right
after apply and the challenge log shows the new state path.
Drain on any worker now succeeds without a `predrain_unstick` workaround
— eviction API is satisfied because at most one pod is unavailable at a
time, and the other replica keeps serving. Monday's kured reboot wave
should roll through cleanly.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
cf5b169cbb
commit
6e920f96af
9 changed files with 103 additions and 42 deletions
|
|
@ -116,10 +116,11 @@ resource "kubernetes_service" "blog" {
|
|||
# tiny PoW (~250ms desktop), get a 30-day cookie, and pass through. Replaces
|
||||
# the global ai-bot-block forwardAuth for this site.
|
||||
module "anubis" {
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "blog"
|
||||
namespace = kubernetes_namespace.website.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.blog.metadata[0].name}.${kubernetes_namespace.website.metadata[0].name}.svc.cluster.local"
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "blog"
|
||||
namespace = kubernetes_namespace.website.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.blog.metadata[0].name}.${kubernetes_namespace.website.metadata[0].name}.svc.cluster.local"
|
||||
shared_store_url = "redis://redis-master.redis.svc.cluster.local:6379/10"
|
||||
}
|
||||
|
||||
module "ingress" {
|
||||
|
|
|
|||
|
|
@ -105,10 +105,11 @@ resource "kubernetes_service" "cyberchef" {
|
|||
|
||||
|
||||
module "anubis" {
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "cc"
|
||||
namespace = kubernetes_namespace.cyberchef.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.cyberchef.metadata[0].name}.${kubernetes_namespace.cyberchef.metadata[0].name}.svc.cluster.local"
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "cc"
|
||||
namespace = kubernetes_namespace.cyberchef.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.cyberchef.metadata[0].name}.${kubernetes_namespace.cyberchef.metadata[0].name}.svc.cluster.local"
|
||||
shared_store_url = "redis://redis-master.redis.svc.cluster.local:6379/5"
|
||||
}
|
||||
|
||||
module "ingress" {
|
||||
|
|
|
|||
|
|
@ -244,11 +244,12 @@ module "tls_secret" {
|
|||
# (which load before any user has a chance to solve PoW), CHALLENGE
|
||||
# everything else — the HTML pages.
|
||||
module "anubis" {
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "f1"
|
||||
namespace = kubernetes_namespace.f1-stream.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.f1-stream.metadata[0].name}.${kubernetes_namespace.f1-stream.metadata[0].name}.svc.cluster.local"
|
||||
policy_yaml = <<-EOT
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "f1"
|
||||
namespace = kubernetes_namespace.f1-stream.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.f1-stream.metadata[0].name}.${kubernetes_namespace.f1-stream.metadata[0].name}.svc.cluster.local"
|
||||
shared_store_url = "redis://redis-master.redis.svc.cluster.local:6379/6"
|
||||
policy_yaml = <<-EOT
|
||||
bots:
|
||||
- import: (data)/bots/_deny-pathological.yaml
|
||||
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
||||
|
|
|
|||
|
|
@ -138,10 +138,11 @@ resource "kubernetes_service" "cache_proxy" {
|
|||
}
|
||||
|
||||
module "anubis" {
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "homepage"
|
||||
namespace = kubernetes_namespace.homepage.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.cache_proxy.metadata[0].name}.${kubernetes_namespace.homepage.metadata[0].name}.svc.cluster.local"
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "homepage"
|
||||
namespace = kubernetes_namespace.homepage.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.cache_proxy.metadata[0].name}.${kubernetes_namespace.homepage.metadata[0].name}.svc.cluster.local"
|
||||
shared_store_url = "redis://redis-master.redis.svc.cluster.local:6379/9"
|
||||
}
|
||||
|
||||
module "ingress" {
|
||||
|
|
|
|||
|
|
@ -85,10 +85,11 @@ resource "kubernetes_service" "jsoncrack" {
|
|||
}
|
||||
|
||||
module "anubis" {
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "json"
|
||||
namespace = kubernetes_namespace.jsoncrack.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.jsoncrack.metadata[0].name}.${kubernetes_namespace.jsoncrack.metadata[0].name}.svc.cluster.local"
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "json"
|
||||
namespace = kubernetes_namespace.jsoncrack.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.jsoncrack.metadata[0].name}.${kubernetes_namespace.jsoncrack.metadata[0].name}.svc.cluster.local"
|
||||
shared_store_url = "redis://redis-master.redis.svc.cluster.local:6379/7"
|
||||
}
|
||||
|
||||
module "ingress" {
|
||||
|
|
|
|||
|
|
@ -104,10 +104,11 @@ resource "kubernetes_service" "kms-web-page" {
|
|||
}
|
||||
|
||||
module "anubis" {
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "kms"
|
||||
namespace = kubernetes_namespace.kms.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.kms-web-page.metadata[0].name}.${kubernetes_namespace.kms.metadata[0].name}.svc.cluster.local"
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "kms"
|
||||
namespace = kubernetes_namespace.kms.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.kms-web-page.metadata[0].name}.${kubernetes_namespace.kms.metadata[0].name}.svc.cluster.local"
|
||||
shared_store_url = "redis://redis-master.redis.svc.cluster.local:6379/8"
|
||||
}
|
||||
|
||||
module "ingress" {
|
||||
|
|
|
|||
|
|
@ -364,10 +364,11 @@ resource "kubernetes_service" "realestate-crawler-api" {
|
|||
# Anubis fronts the UI ingress only; the /api ingress (`module "ingress-api"`)
|
||||
# stays direct so XHRs from the UI bypass the challenge.
|
||||
module "anubis" {
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "wrongmove"
|
||||
namespace = kubernetes_namespace.realestate-crawler.metadata[0].name
|
||||
target_url = "http://realestate-crawler-ui.${kubernetes_namespace.realestate-crawler.metadata[0].name}.svc.cluster.local"
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "wrongmove"
|
||||
namespace = kubernetes_namespace.realestate-crawler.metadata[0].name
|
||||
target_url = "http://realestate-crawler-ui.${kubernetes_namespace.realestate-crawler.metadata[0].name}.svc.cluster.local"
|
||||
shared_store_url = "redis://redis-master.redis.svc.cluster.local:6379/12"
|
||||
}
|
||||
|
||||
module "ingress" {
|
||||
|
|
@ -453,13 +454,15 @@ resource "kubernetes_deployment" "realestate-crawler-celery" {
|
|||
image = "viktorbarzin/realestatecrawler:latest"
|
||||
image_pull_policy = "Always"
|
||||
command = ["python", "-m", "celery", "-A", "celery_app", "worker", "--loglevel=info", "--pool=threads"]
|
||||
# 512Mi OOMed during full London RENT 1-2 bed scrape (~76k existing IDs
|
||||
# + 10k fetched into memory at concurrency=8 threads). Bumped to 1Gi.
|
||||
resources {
|
||||
requests = {
|
||||
cpu = "15m"
|
||||
memory = "512Mi"
|
||||
memory = "1Gi"
|
||||
}
|
||||
limits = {
|
||||
memory = "512Mi"
|
||||
memory = "1Gi"
|
||||
}
|
||||
}
|
||||
port {
|
||||
|
|
|
|||
|
|
@ -103,10 +103,11 @@ resource "kubernetes_service" "travel-blog" {
|
|||
}
|
||||
|
||||
module "anubis" {
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "travel"
|
||||
namespace = kubernetes_namespace.travel-blog.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.travel-blog.metadata[0].name}.${kubernetes_namespace.travel-blog.metadata[0].name}.svc.cluster.local"
|
||||
source = "../../modules/kubernetes/anubis_instance"
|
||||
name = "travel"
|
||||
namespace = kubernetes_namespace.travel-blog.metadata[0].name
|
||||
target_url = "http://${kubernetes_service.travel-blog.metadata[0].name}.${kubernetes_namespace.travel-blog.metadata[0].name}.svc.cluster.local"
|
||||
shared_store_url = "redis://redis-master.redis.svc.cluster.local:6379/11"
|
||||
}
|
||||
|
||||
module "ingress" {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue