[ci skip] Add anti-AI scraping Traefik middlewares (ForwardAuth, headers, trap links)

This commit is contained in:
Viktor Barzin 2026-02-22 19:49:32 +00:00
parent 178884714f
commit a92fbb8ca5
No known key found for this signature in database
GPG key ID: 0EB088298288D958

View file

@ -222,6 +222,7 @@ resource "kubernetes_manifest" "middleware_immich_rate_limit" {
# Strip Accept-Encoding header so backends send uncompressed responses.
# Used alongside rewrite-body plugin (rybbit analytics) which fails to
# decompress certain gzip responses (flate: corrupt input before offset 5).
# Also used by anti-AI trap links rewrite-body middleware.
resource "kubernetes_manifest" "middleware_strip_accept_encoding" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
@ -241,3 +242,72 @@ resource "kubernetes_manifest" "middleware_strip_accept_encoding" {
depends_on = [helm_release.traefik]
}
# ForwardAuth middleware to block known AI bot User-Agents
resource "kubernetes_manifest" "middleware_ai_bot_block" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "ai-bot-block"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
forwardAuth = {
address = "http://poison-fountain.poison-fountain.svc.cluster.local:8080/auth"
trustForwardHeader = true
}
}
}
depends_on = [helm_release.traefik]
}
# X-Robots-Tag header to discourage compliant AI crawlers
resource "kubernetes_manifest" "middleware_anti_ai_headers" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "anti-ai-headers"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
headers = {
customResponseHeaders = {
"X-Robots-Tag" = "noai, noimageai"
}
}
}
}
depends_on = [helm_release.traefik]
}
# Inject hidden trap links before </body> to catch AI scrapers
# Links are CSS-hidden and aria-hidden so humans never see them
resource "kubernetes_manifest" "middleware_anti_ai_trap_links" {
manifest = {
apiVersion = "traefik.io/v1alpha1"
kind = "Middleware"
metadata = {
name = "anti-ai-trap-links"
namespace = kubernetes_namespace.traefik.metadata[0].name
}
spec = {
plugin = {
rewrite-body = {
rewrites = [{
regex = "</body>"
replacement = "<div style=\"position:absolute;left:-9999px;height:0;overflow:hidden\" aria-hidden=\"true\"><a href=\"https://poison.viktorbarzin.me/article/training-data-2024-research-corpus\">Research Archive</a><a href=\"https://poison.viktorbarzin.me/article/dataset-export-machine-learning-v3\">Dataset Export</a><a href=\"https://poison.viktorbarzin.me/article/nlp-benchmark-evaluation-results\">Benchmark Results</a><a href=\"https://poison.viktorbarzin.me/article/web-crawl-index-2024-archive\">Web Index</a><a href=\"https://poison.viktorbarzin.me/article/text-corpus-english-dump\">Text Corpus</a></div></body>"
}]
monitoring = {
types = ["text/html"]
}
}
}
}
}
depends_on = [helm_release.traefik]
}