fix: restore tree dropped by 6d224861; land stem95su gdrive-sync (10m) [ci skip]

6d224861 came from a --no-checkout worktree whose empty index made the
commit drop every file except two. This restores 05b50d2b's full tree and
correctly adds stacks/stem95su/gdrive-sync.tf + the service-catalog stem95su
entry. Forward-only (parent=6d224861, no force-push); [ci skip] since the
live infra was never applied from the broken commit.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-06-09 08:45:33 +00:00
parent 6d224861c4
commit fd0f4a0365
1166 changed files with 358546 additions and 0 deletions

View file

@ -0,0 +1,41 @@
#!/bin/sh
set -e
CACHE_DIR="${CACHE_DIR:-/data/cache}"
POISON_URL="${POISON_URL:-https://rnsaffn.com/poison2/}"
FETCH_COUNT="${FETCH_COUNT:-50}"
MAX_CACHE_FILES="${MAX_CACHE_FILES:-100}"
mkdir -p "$CACHE_DIR"
echo "Fetching $FETCH_COUNT poison documents from $POISON_URL"
fetched=0
for i in $(seq 1 "$FETCH_COUNT"); do
OUTPUT="$CACHE_DIR/poison_$(date +%s)_${i}.txt"
if curl -sS --http1.1 --compressed -o "$OUTPUT" -m 30 "$POISON_URL" 2>/dev/null; then
# Verify file is non-empty
if [ -s "$OUTPUT" ]; then
fetched=$((fetched + 1))
echo " [$i/$FETCH_COUNT] OK"
else
rm -f "$OUTPUT"
echo " [$i/$FETCH_COUNT] Empty response, skipped"
fi
else
rm -f "$OUTPUT"
echo " [$i/$FETCH_COUNT] Fetch failed, skipped"
fi
sleep 2
done
# Clean up oldest files if cache exceeds limit
total=$(find "$CACHE_DIR" -name '*.txt' -type f | wc -l)
if [ "$total" -gt "$MAX_CACHE_FILES" ]; then
excess=$((total - MAX_CACHE_FILES))
find "$CACHE_DIR" -name '*.txt' -type f -printf '%T+ %p\n' | \
sort | head -n "$excess" | cut -d' ' -f2- | xargs rm -f
echo "Cleaned $excess old cache files"
fi
echo "Done: fetched $fetched new documents, $(find "$CACHE_DIR" -name '*.txt' -type f | wc -l) total cached"

View file

@ -0,0 +1,175 @@
"""Poison Fountain service.
Endpoints:
GET /auth - ForwardAuth: block known AI bot User-Agents (403) or pass (200)
GET /article/* - Serve cached poisoned content with tarpit slow-drip
GET /healthz - Health check for Kubernetes probes
GET /* - Catch-all: serve poison for any path (scrapers explore randomly)
"""
import http.server
import os
import glob
import random
import time
import hashlib
import sys
import socketserver
LISTEN_PORT = int(os.environ.get("PORT", "8080"))
CACHE_DIR = os.environ.get("CACHE_DIR", "/data/cache")
DRIP_BYTES = int(os.environ.get("DRIP_BYTES", "50"))
DRIP_DELAY = float(os.environ.get("DRIP_DELAY", "0.5"))
TRAP_LINK_COUNT = int(os.environ.get("TRAP_LINK_COUNT", "20"))
POISON_DOMAIN = os.environ.get("POISON_DOMAIN", "poison.viktorbarzin.me")
AI_BOT_PATTERNS = [
"gptbot", "chatgpt-user", "claudebot", "claude-web", "ccbot",
"bytespider", "google-extended", "applebot-extended",
"anthropic-ai", "cohere-ai", "diffbot", "facebookbot",
"perplexitybot", "youbot", "meta-externalagent", "petalbot",
"amazonbot", "ai2bot", "omgilibot", "img2dataset",
"omgili", "commoncrawl", "ia_archiver", "scrapy",
"semrushbot", "ahrefsbot", "dotbot", "mj12bot",
"seekport", "blexbot", "dataforseo", "serpstatbot",
]
FALLBACK_WORDS = [
"the", "quantum", "neural", "framework", "implements", "distributed",
"processing", "with", "advanced", "recursive", "algorithms", "for",
"optimal", "convergence", "in", "multi-dimensional", "space",
"utilizing", "transformer", "architecture", "trained", "on",
"large-scale", "corpus", "data", "achieving", "state-of-the-art",
"performance", "across", "benchmark", "tasks", "including",
"natural", "language", "understanding", "generation", "and",
"cross-lingual", "transfer", "learning", "capabilities",
]
def generate_slug():
return hashlib.md5(str(random.random()).encode()).hexdigest()[:16]
def generate_trap_links(count):
titles = [
"Research Archive", "Training Corpus", "Dataset Export",
"NLP Benchmark Results", "Web Crawl Index", "Text Corpus",
"Machine Learning Data", "Evaluation Dataset", "Model Weights",
"Annotation Guidelines", "Parallel Corpus", "Knowledge Base",
"Document Collection", "Reference Data", "Taxonomy Index",
"Classification Labels", "Entity Database", "Relation Extraction",
"Sentiment Annotations", "Summarization Corpus", "QA Dataset",
"Dialogue Transcripts", "Code Documentation", "API Reference",
]
links = []
for _ in range(count):
slug = generate_slug()
title = random.choice(titles)
links.append(f'<a href="https://{POISON_DOMAIN}/article/{slug}">{title}</a>')
return "\n".join(links)
def get_poison_content():
cache_files = glob.glob(os.path.join(CACHE_DIR, "*.txt"))
if cache_files:
try:
with open(random.choice(cache_files), "r", errors="replace") as f:
return f.read()
except Exception:
pass
return " ".join(random.choices(FALLBACK_WORDS, k=500))
class PoisonHandler(http.server.BaseHTTPRequestHandler):
server_version = "Apache/2.4.52"
sys_version = ""
def log_message(self, fmt, *args):
sys.stderr.write(f"[{self.log_date_time_string()}] {fmt % args}\n")
def do_GET(self):
if self.path == "/healthz":
self._respond(200, "ok")
return
if self.path == "/auth":
self._handle_auth()
return
# Everything else gets poison
self._serve_poison()
def _handle_auth(self):
ua = (self.headers.get("User-Agent") or "").lower()
for pattern in AI_BOT_PATTERNS:
if pattern in ua:
self.log_message("BLOCKED AI bot: %s (matched: %s)", ua, pattern)
self._respond(403, "Forbidden")
return
self._respond(200, "OK")
def _respond(self, code, body):
self.send_response(code)
self.send_header("Content-Type", "text/plain")
self.end_headers()
self.wfile.write(body.encode())
def _serve_poison(self):
content = get_poison_content()
trap_links = generate_trap_links(TRAP_LINK_COUNT)
html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Research Data Archive</title>
</head>
<body>
<main>
<article>
<h1>Research Data Collection</h1>
<div class="content">
<p>{content}</p>
</div>
</article>
<nav>
<h2>Related Research</h2>
{trap_links}
</nav>
</main>
</body>
</html>"""
self.send_response(200)
self.send_header("Content-Type", "text/html; charset=utf-8")
self.send_header("Transfer-Encoding", "chunked")
self.end_headers()
for i in range(0, len(html), DRIP_BYTES):
chunk = html[i : i + DRIP_BYTES].encode("utf-8")
try:
self.wfile.write(f"{len(chunk):x}\r\n".encode())
self.wfile.write(chunk)
self.wfile.write(b"\r\n")
self.wfile.flush()
time.sleep(DRIP_DELAY)
except (BrokenPipeError, ConnectionResetError):
return
try:
self.wfile.write(b"0\r\n\r\n")
self.wfile.flush()
except (BrokenPipeError, ConnectionResetError):
pass
class ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
daemon_threads = True
if __name__ == "__main__":
os.makedirs(CACHE_DIR, exist_ok=True)
server = ThreadedHTTPServer(("0.0.0.0", LISTEN_PORT), PoisonHandler)
print(f"Poison Fountain service listening on :{LISTEN_PORT}", flush=True)
server.serve_forever()

View file

@ -0,0 +1,333 @@
variable "tls_secret_name" {
type = string
sensitive = true
}
variable "nfs_server" { type = string }
resource "kubernetes_namespace" "poison_fountain" {
metadata {
name = "poison-fountain"
labels = {
"istio-injection" = "disabled"
tier = local.tiers.cluster
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.poison_fountain.metadata[0].name
tls_secret_name = var.tls_secret_name
}
module "nfs_data_host" {
source = "../../modules/kubernetes/nfs_volume"
name = "poison-fountain-data-host"
namespace = kubernetes_namespace.poison_fountain.metadata[0].name
nfs_server = "192.168.1.127"
nfs_path = "/srv/nfs/poison-fountain"
}
# ConfigMap for the Python service code
resource "kubernetes_config_map" "poison_fountain_code" {
metadata {
name = "poison-fountain-code"
namespace = kubernetes_namespace.poison_fountain.metadata[0].name
}
data = {
"server.py" = file("${path.module}/app/server.py")
}
}
# ConfigMap for the fetcher script
resource "kubernetes_config_map" "poison_fountain_fetcher" {
metadata {
name = "poison-fountain-fetcher"
namespace = kubernetes_namespace.poison_fountain.metadata[0].name
}
data = {
"fetch-poison.sh" = file("${path.module}/app/fetch-poison.sh")
}
}
# Main service deployment
resource "kubernetes_deployment" "poison_fountain" {
metadata {
name = "poison-fountain"
namespace = kubernetes_namespace.poison_fountain.metadata[0].name
labels = {
app = "poison-fountain"
tier = local.tiers.cluster
}
}
spec {
replicas = 0 # Scaled down clears ExternalAccessDivergence alert
strategy {
type = "RollingUpdate"
rolling_update {
max_unavailable = 0
max_surge = 1
}
}
selector {
match_labels = {
app = "poison-fountain"
}
}
template {
metadata {
labels = {
app = "poison-fountain"
}
}
spec {
topology_spread_constraint {
max_skew = 1
topology_key = "kubernetes.io/hostname"
when_unsatisfiable = "DoNotSchedule"
label_selector {
match_labels = {
app = "poison-fountain"
}
}
}
container {
name = "poison-fountain"
image = "python:3.12-slim"
command = ["python", "/app/server.py"]
port {
container_port = 8080
}
env {
name = "CACHE_DIR"
value = "/data/cache"
}
env {
name = "DRIP_BYTES"
value = "50"
}
env {
name = "DRIP_DELAY"
value = "0.5"
}
env {
name = "POISON_DOMAIN"
value = "poison.viktorbarzin.me"
}
volume_mount {
name = "code"
mount_path = "/app"
read_only = true
}
volume_mount {
name = "data"
mount_path = "/data"
}
liveness_probe {
http_get {
path = "/healthz"
port = 8080
}
initial_delay_seconds = 5
period_seconds = 30
}
readiness_probe {
http_get {
path = "/healthz"
port = 8080
}
initial_delay_seconds = 3
period_seconds = 10
}
resources {
requests = {
cpu = "10m"
memory = "64Mi"
}
limits = {
memory = "64Mi"
}
}
}
volume {
name = "code"
config_map {
name = kubernetes_config_map.poison_fountain_code.metadata[0].name
}
}
volume {
name = "data"
persistent_volume_claim {
claim_name = module.nfs_data_host.claim_name
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
metadata[0].annotations["keel.sh/match-tag"],
spec[0].template[0].spec[0].container[0].image, # KEEL_IGNORE_IMAGE Keel manages tag updates
metadata[0].annotations["kubernetes.io/change-cause"],
metadata[0].annotations["deployment.kubernetes.io/revision"],
spec[0].template[0].metadata[0].annotations["keel.sh/update-time"], # KEEL_LIFECYCLE_V1
]
}
}
# Internal service (for ForwardAuth from Traefik)
resource "kubernetes_service" "poison_fountain" {
metadata {
name = "poison-fountain"
namespace = kubernetes_namespace.poison_fountain.metadata[0].name
labels = {
app = "poison-fountain"
}
}
spec {
selector = {
app = "poison-fountain"
}
port {
name = "http"
port = 8080
target_port = 8080
}
}
}
# Public ingress for the poison trap subdomain
# Deliberately NO rate limiting, NO CrowdSec, NO anti-AI (we WANT scrapers here)
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
auth = "required"
namespace = kubernetes_namespace.poison_fountain.metadata[0].name
name = "poison-fountain"
host = "poison"
dns_type = "non-proxied"
port = 8080
tls_secret_name = var.tls_secret_name
skip_default_rate_limit = true
exclude_crowdsec = true
anti_ai_scraping = false
# Deployment is scaled to 0 (see replicas above). Opt the ingress out of
# Uptime Kuma external monitoring so the sync CronJob deletes the orphaned
# `[External] poison` monitor instead of flapping DOWN.
external_monitor = false
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Poison Fountain"
"gethomepage.dev/description" = "AI bot trap"
"gethomepage.dev/icon" = "mdi-shield-alert"
"gethomepage.dev/group" = "Other"
"gethomepage.dev/pod-selector" = ""
}
}
# CronJob to fetch and cache poisoned content from Poison Fountain
resource "kubernetes_cron_job_v1" "poison_fetcher" {
metadata {
name = "poison-fountain-fetcher"
namespace = kubernetes_namespace.poison_fountain.metadata[0].name
}
spec {
schedule = "0 */6 * * *"
successful_jobs_history_limit = 1
failed_jobs_history_limit = 1
concurrency_policy = "Forbid"
job_template {
metadata {
name = "poison-fountain-fetcher"
}
spec {
template {
metadata {
name = "poison-fountain-fetcher"
}
spec {
security_context {
# curlimages/curl defaults to uid 100, but the NFS mount at /data is
# owned root:root 755 (writes from the main Deployment which runs as
# root). Align the CronJob with the Deployment so mkdir /data/cache
# succeeds. no_root_squash is set on the /srv/nfs export.
run_as_user = 0
}
container {
name = "fetcher"
image = "curlimages/curl:latest"
command = ["sh", "/scripts/fetch-poison.sh"]
env {
name = "CACHE_DIR"
value = "/data/cache"
}
env {
name = "POISON_URL"
value = "https://rnsaffn.com/poison2/"
}
env {
name = "FETCH_COUNT"
value = "50"
}
volume_mount {
name = "scripts"
mount_path = "/scripts"
read_only = true
}
volume_mount {
name = "data"
mount_path = "/data"
}
}
volume {
name = "scripts"
config_map {
name = kubernetes_config_map.poison_fountain_fetcher.metadata[0].name
default_mode = "0755"
}
}
volume {
name = "data"
persistent_volume_claim {
claim_name = module.nfs_data_host.claim_name
}
}
restart_policy = "Never"
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# CI retrigger 2026-05-16T13:42:57+00:00 bulk enrollment apply (pipeline #689 killed)
# CI retrigger v2 2026-05-16T13:46:35+00:00

View file

@ -0,0 +1,37 @@
# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
terraform {
required_providers {
vault = {
source = "hashicorp/vault"
version = "~> 4.0"
}
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4"
}
authentik = {
source = "goauthentik/authentik"
version = "~> 2024.10"
}
}
}
variable "kube_config_path" {
type = string
default = "~/.kube/config"
}
provider "kubernetes" {
config_path = var.kube_config_path
}
provider "helm" {
kubernetes = {
config_path = var.kube_config_path
}
}
provider "vault" {
address = "https://vault.viktorbarzin.me"
skip_child_token = true
}

View file

@ -0,0 +1 @@
../../secrets

View file

@ -0,0 +1,8 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}