fix: restore tree dropped by 6d224861; land stem95su gdrive-sync (10m) [ci skip]

6d224861 came from a --no-checkout worktree whose empty index made the commit drop every file except two. This restores 05b50d2b's full tree and correctly adds stacks/stem95su/gdrive-sync.tf + the service-catalog stem95su entry. Forward-only (parent=6d224861, no force-push); [ci skip] since the live infra was never applied from the broken commit. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 08:45:33 +00:00 · 2026-06-09 08:45:33 +00:00 · fd0f4a0365
commit fd0f4a0365
parent 6d224861c4
1166 changed files with 358546 additions and 0 deletions
--- a/stacks/poison-fountain/app/fetch-poison.sh
+++ b/stacks/poison-fountain/app/fetch-poison.sh
@ -0,0 +1,41 @@
+#!/bin/sh
+set -e
+
+CACHE_DIR="${CACHE_DIR:-/data/cache}"
+POISON_URL="${POISON_URL:-https://rnsaffn.com/poison2/}"
+FETCH_COUNT="${FETCH_COUNT:-50}"
+MAX_CACHE_FILES="${MAX_CACHE_FILES:-100}"
+
+mkdir -p "$CACHE_DIR"
+
+echo "Fetching $FETCH_COUNT poison documents from $POISON_URL"
+
+fetched=0
+for i in $(seq 1 "$FETCH_COUNT"); do
+  OUTPUT="$CACHE_DIR/poison_$(date +%s)_${i}.txt"
+  if curl -sS --http1.1 --compressed -o "$OUTPUT" -m 30 "$POISON_URL" 2>/dev/null; then
+    # Verify file is non-empty
+    if [ -s "$OUTPUT" ]; then
+      fetched=$((fetched + 1))
+      echo "  [$i/$FETCH_COUNT] OK"
+    else
+      rm -f "$OUTPUT"
+      echo "  [$i/$FETCH_COUNT] Empty response, skipped"
+    fi
+  else
+    rm -f "$OUTPUT"
+    echo "  [$i/$FETCH_COUNT] Fetch failed, skipped"
+  fi
+  sleep 2
+done
+
+# Clean up oldest files if cache exceeds limit
+total=$(find "$CACHE_DIR" -name '*.txt' -type f | wc -l)
+if [ "$total" -gt "$MAX_CACHE_FILES" ]; then
+  excess=$((total - MAX_CACHE_FILES))
+  find "$CACHE_DIR" -name '*.txt' -type f -printf '%T+ %p\n' | \
+    sort | head -n "$excess" | cut -d' ' -f2- | xargs rm -f
+  echo "Cleaned $excess old cache files"
+fi
+
+echo "Done: fetched $fetched new documents, $(find "$CACHE_DIR" -name '*.txt' -type f | wc -l) total cached"
--- a/stacks/poison-fountain/app/server.py
+++ b/stacks/poison-fountain/app/server.py
@ -0,0 +1,175 @@
+"""Poison Fountain service.
+
+Endpoints:
+  GET /auth       - ForwardAuth: block known AI bot User-Agents (403) or pass (200)
+  GET /article/*  - Serve cached poisoned content with tarpit slow-drip
+  GET /healthz    - Health check for Kubernetes probes
+  GET /*          - Catch-all: serve poison for any path (scrapers explore randomly)
+"""
+
+import http.server
+import os
+import glob
+import random
+import time
+import hashlib
+import sys
+import socketserver
+
+LISTEN_PORT = int(os.environ.get("PORT", "8080"))
+CACHE_DIR = os.environ.get("CACHE_DIR", "/data/cache")
+DRIP_BYTES = int(os.environ.get("DRIP_BYTES", "50"))
+DRIP_DELAY = float(os.environ.get("DRIP_DELAY", "0.5"))
+TRAP_LINK_COUNT = int(os.environ.get("TRAP_LINK_COUNT", "20"))
+POISON_DOMAIN = os.environ.get("POISON_DOMAIN", "poison.viktorbarzin.me")
+
+AI_BOT_PATTERNS = [
+    "gptbot", "chatgpt-user", "claudebot", "claude-web", "ccbot",
+    "bytespider", "google-extended", "applebot-extended",
+    "anthropic-ai", "cohere-ai", "diffbot", "facebookbot",
+    "perplexitybot", "youbot", "meta-externalagent", "petalbot",
+    "amazonbot", "ai2bot", "omgilibot", "img2dataset",
+    "omgili", "commoncrawl", "ia_archiver", "scrapy",
+    "semrushbot", "ahrefsbot", "dotbot", "mj12bot",
+    "seekport", "blexbot", "dataforseo", "serpstatbot",
+]
+
+FALLBACK_WORDS = [
+    "the", "quantum", "neural", "framework", "implements", "distributed",
+    "processing", "with", "advanced", "recursive", "algorithms", "for",
+    "optimal", "convergence", "in", "multi-dimensional", "space",
+    "utilizing", "transformer", "architecture", "trained", "on",
+    "large-scale", "corpus", "data", "achieving", "state-of-the-art",
+    "performance", "across", "benchmark", "tasks", "including",
+    "natural", "language", "understanding", "generation", "and",
+    "cross-lingual", "transfer", "learning", "capabilities",
+]
+
+
+def generate_slug():
+    return hashlib.md5(str(random.random()).encode()).hexdigest()[:16]
+
+
+def generate_trap_links(count):
+    titles = [
+        "Research Archive", "Training Corpus", "Dataset Export",
+        "NLP Benchmark Results", "Web Crawl Index", "Text Corpus",
+        "Machine Learning Data", "Evaluation Dataset", "Model Weights",
+        "Annotation Guidelines", "Parallel Corpus", "Knowledge Base",
+        "Document Collection", "Reference Data", "Taxonomy Index",
+        "Classification Labels", "Entity Database", "Relation Extraction",
+        "Sentiment Annotations", "Summarization Corpus", "QA Dataset",
+        "Dialogue Transcripts", "Code Documentation", "API Reference",
+    ]
+    links = []
+    for _ in range(count):
+        slug = generate_slug()
+        title = random.choice(titles)
+        links.append(f'<a href="https://{POISON_DOMAIN}/article/{slug}">{title}</a>')
+    return "\n".join(links)
+
+
+def get_poison_content():
+    cache_files = glob.glob(os.path.join(CACHE_DIR, "*.txt"))
+    if cache_files:
+        try:
+            with open(random.choice(cache_files), "r", errors="replace") as f:
+                return f.read()
+        except Exception:
+            pass
+    return " ".join(random.choices(FALLBACK_WORDS, k=500))
+
+
+class PoisonHandler(http.server.BaseHTTPRequestHandler):
+    server_version = "Apache/2.4.52"
+    sys_version = ""
+
+    def log_message(self, fmt, *args):
+        sys.stderr.write(f"[{self.log_date_time_string()}] {fmt % args}\n")
+
+    def do_GET(self):
+        if self.path == "/healthz":
+            self._respond(200, "ok")
+            return
+
+        if self.path == "/auth":
+            self._handle_auth()
+            return
+
+        # Everything else gets poison
+        self._serve_poison()
+
+    def _handle_auth(self):
+        ua = (self.headers.get("User-Agent") or "").lower()
+        for pattern in AI_BOT_PATTERNS:
+            if pattern in ua:
+                self.log_message("BLOCKED AI bot: %s (matched: %s)", ua, pattern)
+                self._respond(403, "Forbidden")
+                return
+        self._respond(200, "OK")
+
+    def _respond(self, code, body):
+        self.send_response(code)
+        self.send_header("Content-Type", "text/plain")
+        self.end_headers()
+        self.wfile.write(body.encode())
+
+    def _serve_poison(self):
+        content = get_poison_content()
+        trap_links = generate_trap_links(TRAP_LINK_COUNT)
+
+        html = f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>Research Data Archive</title>
+</head>
+<body>
+<main>
+<article>
+<h1>Research Data Collection</h1>
+<div class="content">
+<p>{content}</p>
+</div>
+</article>
+<nav>
+<h2>Related Research</h2>
+{trap_links}
+</nav>
+</main>
+</body>
+</html>"""
+
+        self.send_response(200)
+        self.send_header("Content-Type", "text/html; charset=utf-8")
+        self.send_header("Transfer-Encoding", "chunked")
+        self.end_headers()
+
+        for i in range(0, len(html), DRIP_BYTES):
+            chunk = html[i : i + DRIP_BYTES].encode("utf-8")
+            try:
+                self.wfile.write(f"{len(chunk):x}\r\n".encode())
+                self.wfile.write(chunk)
+                self.wfile.write(b"\r\n")
+                self.wfile.flush()
+                time.sleep(DRIP_DELAY)
+            except (BrokenPipeError, ConnectionResetError):
+                return
+
+        try:
+            self.wfile.write(b"0\r\n\r\n")
+            self.wfile.flush()
+        except (BrokenPipeError, ConnectionResetError):
+            pass
+
+
+class ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
+    daemon_threads = True
+
+
+if __name__ == "__main__":
+    os.makedirs(CACHE_DIR, exist_ok=True)
+    server = ThreadedHTTPServer(("0.0.0.0", LISTEN_PORT), PoisonHandler)
+    print(f"Poison Fountain service listening on :{LISTEN_PORT}", flush=True)
+    server.serve_forever()
--- a/stacks/poison-fountain/main.tf
+++ b/stacks/poison-fountain/main.tf
@ -0,0 +1,333 @@
+variable "tls_secret_name" {
+  type      = string
+  sensitive = true
+}
+variable "nfs_server" { type = string }
+
+
+resource "kubernetes_namespace" "poison_fountain" {
+  metadata {
+    name = "poison-fountain"
+    labels = {
+      "istio-injection" = "disabled"
+      tier              = local.tiers.cluster
+      "keel.sh/enrolled" = "true"
+    }
+  }
+  lifecycle {
+    # KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
+    ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
+  }
+}
+
+module "tls_secret" {
+  source          = "../../modules/kubernetes/setup_tls_secret"
+  namespace       = kubernetes_namespace.poison_fountain.metadata[0].name
+  tls_secret_name = var.tls_secret_name
+}
+
+module "nfs_data_host" {
+  source     = "../../modules/kubernetes/nfs_volume"
+  name       = "poison-fountain-data-host"
+  namespace  = kubernetes_namespace.poison_fountain.metadata[0].name
+  nfs_server = "192.168.1.127"
+  nfs_path   = "/srv/nfs/poison-fountain"
+}
+
+# ConfigMap for the Python service code
+resource "kubernetes_config_map" "poison_fountain_code" {
+  metadata {
+    name      = "poison-fountain-code"
+    namespace = kubernetes_namespace.poison_fountain.metadata[0].name
+  }
+
+  data = {
+    "server.py" = file("${path.module}/app/server.py")
+  }
+}
+
+# ConfigMap for the fetcher script
+resource "kubernetes_config_map" "poison_fountain_fetcher" {
+  metadata {
+    name      = "poison-fountain-fetcher"
+    namespace = kubernetes_namespace.poison_fountain.metadata[0].name
+  }
+
+  data = {
+    "fetch-poison.sh" = file("${path.module}/app/fetch-poison.sh")
+  }
+}
+
+# Main service deployment
+resource "kubernetes_deployment" "poison_fountain" {
+  metadata {
+    name      = "poison-fountain"
+    namespace = kubernetes_namespace.poison_fountain.metadata[0].name
+    labels = {
+      app  = "poison-fountain"
+      tier = local.tiers.cluster
+    }
+  }
+
+  spec {
+    replicas = 0 # Scaled down — clears ExternalAccessDivergence alert
+    strategy {
+      type = "RollingUpdate"
+      rolling_update {
+        max_unavailable = 0
+        max_surge       = 1
+      }
+    }
+    selector {
+      match_labels = {
+        app = "poison-fountain"
+      }
+    }
+    template {
+      metadata {
+        labels = {
+          app = "poison-fountain"
+        }
+      }
+      spec {
+        topology_spread_constraint {
+          max_skew           = 1
+          topology_key       = "kubernetes.io/hostname"
+          when_unsatisfiable = "DoNotSchedule"
+          label_selector {
+            match_labels = {
+              app = "poison-fountain"
+            }
+          }
+        }
+        container {
+          name    = "poison-fountain"
+          image   = "python:3.12-slim"
+          command = ["python", "/app/server.py"]
+
+          port {
+            container_port = 8080
+          }
+
+          env {
+            name  = "CACHE_DIR"
+            value = "/data/cache"
+          }
+          env {
+            name  = "DRIP_BYTES"
+            value = "50"
+          }
+          env {
+            name  = "DRIP_DELAY"
+            value = "0.5"
+          }
+          env {
+            name  = "POISON_DOMAIN"
+            value = "poison.viktorbarzin.me"
+          }
+
+          volume_mount {
+            name       = "code"
+            mount_path = "/app"
+            read_only  = true
+          }
+          volume_mount {
+            name       = "data"
+            mount_path = "/data"
+          }
+
+          liveness_probe {
+            http_get {
+              path = "/healthz"
+              port = 8080
+            }
+            initial_delay_seconds = 5
+            period_seconds        = 30
+          }
+          readiness_probe {
+            http_get {
+              path = "/healthz"
+              port = 8080
+            }
+            initial_delay_seconds = 3
+            period_seconds        = 10
+          }
+
+          resources {
+            requests = {
+              cpu    = "10m"
+              memory = "64Mi"
+            }
+            limits = {
+              memory = "64Mi"
+            }
+          }
+        }
+
+        volume {
+          name = "code"
+          config_map {
+            name = kubernetes_config_map.poison_fountain_code.metadata[0].name
+          }
+        }
+        volume {
+          name = "data"
+          persistent_volume_claim {
+            claim_name = module.nfs_data_host.claim_name
+          }
+        }
+      }
+    }
+  }
+  lifecycle {
+    ignore_changes = [
+      spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
+      metadata[0].annotations["keel.sh/policy"],
+      metadata[0].annotations["keel.sh/trigger"],
+      metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
+      metadata[0].annotations["keel.sh/match-tag"],
+      spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE — Keel manages tag updates
+      metadata[0].annotations["kubernetes.io/change-cause"],
+      metadata[0].annotations["deployment.kubernetes.io/revision"],
+      spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
+    ]
+  }
+}
+
+# Internal service (for ForwardAuth from Traefik)
+resource "kubernetes_service" "poison_fountain" {
+  metadata {
+    name      = "poison-fountain"
+    namespace = kubernetes_namespace.poison_fountain.metadata[0].name
+    labels = {
+      app = "poison-fountain"
+    }
+  }
+
+  spec {
+    selector = {
+      app = "poison-fountain"
+    }
+    port {
+      name        = "http"
+      port        = 8080
+      target_port = 8080
+    }
+  }
+}
+
+# Public ingress for the poison trap subdomain
+# Deliberately NO rate limiting, NO CrowdSec, NO anti-AI (we WANT scrapers here)
+module "ingress" {
+  source                  = "../../modules/kubernetes/ingress_factory"
+  auth                    = "required"
+  namespace               = kubernetes_namespace.poison_fountain.metadata[0].name
+  name                    = "poison-fountain"
+  host                    = "poison"
+  dns_type                = "non-proxied"
+  port                    = 8080
+  tls_secret_name         = var.tls_secret_name
+  skip_default_rate_limit = true
+  exclude_crowdsec        = true
+  anti_ai_scraping        = false
+  # Deployment is scaled to 0 (see replicas above). Opt the ingress out of
+  # Uptime Kuma external monitoring so the sync CronJob deletes the orphaned
+  # `[External] poison` monitor instead of flapping DOWN.
+  external_monitor = false
+  extra_annotations = {
+    "gethomepage.dev/enabled"      = "true"
+    "gethomepage.dev/name"         = "Poison Fountain"
+    "gethomepage.dev/description"  = "AI bot trap"
+    "gethomepage.dev/icon"         = "mdi-shield-alert"
+    "gethomepage.dev/group"        = "Other"
+    "gethomepage.dev/pod-selector" = ""
+  }
+}
+
+# CronJob to fetch and cache poisoned content from Poison Fountain
+resource "kubernetes_cron_job_v1" "poison_fetcher" {
+  metadata {
+    name      = "poison-fountain-fetcher"
+    namespace = kubernetes_namespace.poison_fountain.metadata[0].name
+  }
+
+  spec {
+    schedule                      = "0 */6 * * *"
+    successful_jobs_history_limit = 1
+    failed_jobs_history_limit     = 1
+    concurrency_policy            = "Forbid"
+
+    job_template {
+      metadata {
+        name = "poison-fountain-fetcher"
+      }
+      spec {
+        template {
+          metadata {
+            name = "poison-fountain-fetcher"
+          }
+          spec {
+            security_context {
+              # curlimages/curl defaults to uid 100, but the NFS mount at /data is
+              # owned root:root 755 (writes from the main Deployment which runs as
+              # root). Align the CronJob with the Deployment so mkdir /data/cache
+              # succeeds. no_root_squash is set on the /srv/nfs export.
+              run_as_user = 0
+            }
+            container {
+              name    = "fetcher"
+              image   = "curlimages/curl:latest"
+              command = ["sh", "/scripts/fetch-poison.sh"]
+
+              env {
+                name  = "CACHE_DIR"
+                value = "/data/cache"
+              }
+              env {
+                name  = "POISON_URL"
+                value = "https://rnsaffn.com/poison2/"
+              }
+              env {
+                name  = "FETCH_COUNT"
+                value = "50"
+              }
+
+              volume_mount {
+                name       = "scripts"
+                mount_path = "/scripts"
+                read_only  = true
+              }
+              volume_mount {
+                name       = "data"
+                mount_path = "/data"
+              }
+            }
+
+            volume {
+              name = "scripts"
+              config_map {
+                name         = kubernetes_config_map.poison_fountain_fetcher.metadata[0].name
+                default_mode = "0755"
+              }
+            }
+            volume {
+              name = "data"
+              persistent_volume_claim {
+                claim_name = module.nfs_data_host.claim_name
+              }
+            }
+
+            restart_policy = "Never"
+          }
+        }
+      }
+    }
+  }
+  lifecycle {
+    # KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
+    ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
+  }
+}
+
+# CI retrigger 2026-05-16T13:42:57+00:00 — bulk enrollment apply (pipeline #689 killed)
+# CI retrigger v2 2026-05-16T13:46:35+00:00
--- a/stacks/poison-fountain/providers.tf
+++ b/stacks/poison-fountain/providers.tf
@ -0,0 +1,37 @@
+# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
+terraform {
+  required_providers {
+    vault = {
+      source  = "hashicorp/vault"
+      version = "~> 4.0"
+    }
+    cloudflare = {
+      source  = "cloudflare/cloudflare"
+      version = "~> 4"
+    }
+    authentik = {
+      source  = "goauthentik/authentik"
+      version = "~> 2024.10"
+    }
+  }
+}
+
+variable "kube_config_path" {
+  type    = string
+  default = "~/.kube/config"
+}
+
+provider "kubernetes" {
+  config_path = var.kube_config_path
+}
+
+provider "helm" {
+  kubernetes = {
+    config_path = var.kube_config_path
+  }
+}
+
+provider "vault" {
+  address          = "https://vault.viktorbarzin.me"
+  skip_child_token = true
+}
--- a/stacks/poison-fountain/secrets
+++ b/stacks/poison-fountain/secrets
@ -0,0 +1 @@
+../../secrets
--- a/stacks/poison-fountain/terragrunt.hcl
+++ b/stacks/poison-fountain/terragrunt.hcl
@ -0,0 +1,8 @@
+include "root" {
+  path = find_in_parent_folders()
+}
+
+dependency "platform" {
+  config_path  = "../platform"
+  skip_outputs = true
+}