[ci skip] Add anti-AI scraping Traefik middlewares (ForwardAuth, headers, trap links)
This commit is contained in:
parent
c277d28bd8
commit
fd9b06266d
1 changed files with 70 additions and 0 deletions
|
|
@ -222,6 +222,7 @@ resource "kubernetes_manifest" "middleware_immich_rate_limit" {
|
|||
# Strip Accept-Encoding header so backends send uncompressed responses.
|
||||
# Used alongside rewrite-body plugin (rybbit analytics) which fails to
|
||||
# decompress certain gzip responses (flate: corrupt input before offset 5).
|
||||
# Also used by anti-AI trap links rewrite-body middleware.
|
||||
resource "kubernetes_manifest" "middleware_strip_accept_encoding" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
|
|
@ -241,3 +242,72 @@ resource "kubernetes_manifest" "middleware_strip_accept_encoding" {
|
|||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
||||
# ForwardAuth middleware to block known AI bot User-Agents
|
||||
resource "kubernetes_manifest" "middleware_ai_bot_block" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "ai-bot-block"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
forwardAuth = {
|
||||
address = "http://poison-fountain.poison-fountain.svc.cluster.local:8080/auth"
|
||||
trustForwardHeader = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
||||
# X-Robots-Tag header to discourage compliant AI crawlers
|
||||
resource "kubernetes_manifest" "middleware_anti_ai_headers" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "anti-ai-headers"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
headers = {
|
||||
customResponseHeaders = {
|
||||
"X-Robots-Tag" = "noai, noimageai"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
||||
# Inject hidden trap links before </body> to catch AI scrapers
|
||||
# Links are CSS-hidden and aria-hidden so humans never see them
|
||||
resource "kubernetes_manifest" "middleware_anti_ai_trap_links" {
|
||||
manifest = {
|
||||
apiVersion = "traefik.io/v1alpha1"
|
||||
kind = "Middleware"
|
||||
metadata = {
|
||||
name = "anti-ai-trap-links"
|
||||
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||
}
|
||||
spec = {
|
||||
plugin = {
|
||||
rewrite-body = {
|
||||
rewrites = [{
|
||||
regex = "</body>"
|
||||
replacement = "<div style=\"position:absolute;left:-9999px;height:0;overflow:hidden\" aria-hidden=\"true\"><a href=\"https://poison.viktorbarzin.me/article/training-data-2024-research-corpus\">Research Archive</a><a href=\"https://poison.viktorbarzin.me/article/dataset-export-machine-learning-v3\">Dataset Export</a><a href=\"https://poison.viktorbarzin.me/article/nlp-benchmark-evaluation-results\">Benchmark Results</a><a href=\"https://poison.viktorbarzin.me/article/web-crawl-index-2024-archive\">Web Index</a><a href=\"https://poison.viktorbarzin.me/article/text-corpus-english-dump\">Text Corpus</a></div></body>"
|
||||
}]
|
||||
monitoring = {
|
||||
types = ["text/html"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
depends_on = [helm_release.traefik]
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue