[ci skip] Add anti-AI scraping Traefik middlewares (ForwardAuth, headers, trap links)
This commit is contained in:
parent
178884714f
commit
a92fbb8ca5
1 changed files with 70 additions and 0 deletions
|
|
@ -222,6 +222,7 @@ resource "kubernetes_manifest" "middleware_immich_rate_limit" {
|
||||||
# Strip Accept-Encoding header so backends send uncompressed responses.
|
# Strip Accept-Encoding header so backends send uncompressed responses.
|
||||||
# Used alongside rewrite-body plugin (rybbit analytics) which fails to
|
# Used alongside rewrite-body plugin (rybbit analytics) which fails to
|
||||||
# decompress certain gzip responses (flate: corrupt input before offset 5).
|
# decompress certain gzip responses (flate: corrupt input before offset 5).
|
||||||
|
# Also used by anti-AI trap links rewrite-body middleware.
|
||||||
resource "kubernetes_manifest" "middleware_strip_accept_encoding" {
|
resource "kubernetes_manifest" "middleware_strip_accept_encoding" {
|
||||||
manifest = {
|
manifest = {
|
||||||
apiVersion = "traefik.io/v1alpha1"
|
apiVersion = "traefik.io/v1alpha1"
|
||||||
|
|
@ -241,3 +242,72 @@ resource "kubernetes_manifest" "middleware_strip_accept_encoding" {
|
||||||
|
|
||||||
depends_on = [helm_release.traefik]
|
depends_on = [helm_release.traefik]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ForwardAuth middleware to block known AI bot User-Agents
|
||||||
|
resource "kubernetes_manifest" "middleware_ai_bot_block" {
|
||||||
|
manifest = {
|
||||||
|
apiVersion = "traefik.io/v1alpha1"
|
||||||
|
kind = "Middleware"
|
||||||
|
metadata = {
|
||||||
|
name = "ai-bot-block"
|
||||||
|
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||||
|
}
|
||||||
|
spec = {
|
||||||
|
forwardAuth = {
|
||||||
|
address = "http://poison-fountain.poison-fountain.svc.cluster.local:8080/auth"
|
||||||
|
trustForwardHeader = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
depends_on = [helm_release.traefik]
|
||||||
|
}
|
||||||
|
|
||||||
|
# X-Robots-Tag header to discourage compliant AI crawlers
|
||||||
|
resource "kubernetes_manifest" "middleware_anti_ai_headers" {
|
||||||
|
manifest = {
|
||||||
|
apiVersion = "traefik.io/v1alpha1"
|
||||||
|
kind = "Middleware"
|
||||||
|
metadata = {
|
||||||
|
name = "anti-ai-headers"
|
||||||
|
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||||
|
}
|
||||||
|
spec = {
|
||||||
|
headers = {
|
||||||
|
customResponseHeaders = {
|
||||||
|
"X-Robots-Tag" = "noai, noimageai"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
depends_on = [helm_release.traefik]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Inject hidden trap links before </body> to catch AI scrapers
|
||||||
|
# Links are CSS-hidden and aria-hidden so humans never see them
|
||||||
|
resource "kubernetes_manifest" "middleware_anti_ai_trap_links" {
|
||||||
|
manifest = {
|
||||||
|
apiVersion = "traefik.io/v1alpha1"
|
||||||
|
kind = "Middleware"
|
||||||
|
metadata = {
|
||||||
|
name = "anti-ai-trap-links"
|
||||||
|
namespace = kubernetes_namespace.traefik.metadata[0].name
|
||||||
|
}
|
||||||
|
spec = {
|
||||||
|
plugin = {
|
||||||
|
rewrite-body = {
|
||||||
|
rewrites = [{
|
||||||
|
regex = "</body>"
|
||||||
|
replacement = "<div style=\"position:absolute;left:-9999px;height:0;overflow:hidden\" aria-hidden=\"true\"><a href=\"https://poison.viktorbarzin.me/article/training-data-2024-research-corpus\">Research Archive</a><a href=\"https://poison.viktorbarzin.me/article/dataset-export-machine-learning-v3\">Dataset Export</a><a href=\"https://poison.viktorbarzin.me/article/nlp-benchmark-evaluation-results\">Benchmark Results</a><a href=\"https://poison.viktorbarzin.me/article/web-crawl-index-2024-archive\">Web Index</a><a href=\"https://poison.viktorbarzin.me/article/text-corpus-english-dump\">Text Corpus</a></div></body>"
|
||||||
|
}]
|
||||||
|
monitoring = {
|
||||||
|
types = ["text/html"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
depends_on = [helm_release.traefik]
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue