anubis: re-protect f1 with a per-host policy that allows JSON routes
Earlier f1 revert left the host fully unprotected (no Anubis,
exclude_crowdsec=true on the ingress already). Re-add Anubis with
a custom policy_yaml that:
- ALLOWs /_app/* (SvelteKit immutable JS/CSS chunks loaded before
any cookie exists), /openapi.json, /docs, /api/* (FastAPI meta).
- ALLOWs the 9 known JSON/proxy routes (schedule, streams,
embed, embed-asset, extract, extractors, health, proxy, relay)
so the SvelteKit SPA's XHRs return JSON instead of the challenge
HTML.
- Catch-all CHALLENGE for everything else — the SPA HTML pages
(which fall through to FastAPI's `/{path}` catch-all) get the
PoW gate.
The ALLOWed JSON routes are technically scrapeable by a determined
bot, but the user's stated goal is "avoid accidental scrapes" — the
HTML/SPA is the AI-training target, and that stays gated.
Verified: / → Anubis challenge HTML; /schedule, /streams → JSON;
/_app/.../app.js → text/javascript; ClaudeBot UA → Anubis deny page.
This commit is contained in:
parent
a89d4a7d2a
commit
04cb22fd3b
1 changed files with 43 additions and 5 deletions
|
|
@ -228,18 +228,56 @@ module "tls_secret" {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# NOTE: f1-stream serves its SPA + JSON data endpoints (/schedule, /embed,
|
# f1-stream serves its SvelteKit SPA via the FastAPI `/{path}` catch-all
|
||||||
# /embed-asset, …) all on the same path tree, so putting Anubis in front
|
# and exposes 14 JSON/proxy routes at root (/schedule, /streams, /embed,
|
||||||
# breaks XHR data fetches with "Unexpected token '<', '<!doctype '" — the
|
# /embed-asset, /relay, /proxy, /extract, /extractors, /health). A flat
|
||||||
# challenge HTML lands where JSON is expected. Anubis is removed for f1
|
# Anubis catch-all CHALLENGE breaks the SPA's XHRs with "Unexpected token
|
||||||
# until/unless we add a /api carve-out the way wrongmove does.
|
# '<', '<!doctype '" because the schedule fetch lands on the challenge HTML.
|
||||||
|
# Custom policy: ALLOW the known JSON routes + SvelteKit `_app/` assets
|
||||||
|
# (which load before any user has a chance to solve PoW), CHALLENGE
|
||||||
|
# everything else — the HTML pages.
|
||||||
|
module "anubis" {
|
||||||
|
source = "../../modules/kubernetes/anubis_instance"
|
||||||
|
name = "f1"
|
||||||
|
namespace = kubernetes_namespace.f1-stream.metadata[0].name
|
||||||
|
target_url = "http://${kubernetes_service.f1-stream.metadata[0].name}.${kubernetes_namespace.f1-stream.metadata[0].name}.svc.cluster.local"
|
||||||
|
policy_yaml = <<-EOT
|
||||||
|
bots:
|
||||||
|
- import: (data)/bots/_deny-pathological.yaml
|
||||||
|
- import: (data)/bots/aggressive-brazilian-scrapers.yaml
|
||||||
|
- import: (data)/meta/ai-block-aggressive.yaml
|
||||||
|
- import: (data)/crawlers/_allow-good.yaml
|
||||||
|
- import: (data)/clients/x-firefox-ai.yaml
|
||||||
|
- import: (data)/common/keep-internet-working.yaml
|
||||||
|
# SvelteKit immutable assets (CSS/JS chunks) and OpenAPI/health routes —
|
||||||
|
# served pre-cookie, must pass without challenge.
|
||||||
|
- name: f1-svelte-assets-and-meta
|
||||||
|
path_regex: ^/(_app/|openapi\.json|docs|api/)
|
||||||
|
action: ALLOW
|
||||||
|
# Application JSON routes — XHR'd by the SPA after the user has solved
|
||||||
|
# the PoW for `/`. We allow them unconditionally because the alternative
|
||||||
|
# (carve-out per route via separate Ingress objects) is brittle and
|
||||||
|
# because the data they expose (stream URLs, schedule metadata) is not
|
||||||
|
# the AI-scraping target — the HTML/SPA is.
|
||||||
|
- name: f1-data-routes
|
||||||
|
path_regex: ^/(embed|embed-asset|extract|extractors|health|proxy|relay|schedule|streams)(/|\?|$)
|
||||||
|
action: ALLOW
|
||||||
|
- name: catchall-challenge
|
||||||
|
path_regex: .*
|
||||||
|
action: CHALLENGE
|
||||||
|
EOT
|
||||||
|
}
|
||||||
|
|
||||||
module "ingress" {
|
module "ingress" {
|
||||||
source = "../../modules/kubernetes/ingress_factory"
|
source = "../../modules/kubernetes/ingress_factory"
|
||||||
dns_type = "non-proxied"
|
dns_type = "non-proxied"
|
||||||
namespace = kubernetes_namespace.f1-stream.metadata[0].name
|
namespace = kubernetes_namespace.f1-stream.metadata[0].name
|
||||||
name = "f1"
|
name = "f1"
|
||||||
|
service_name = module.anubis.service_name
|
||||||
|
port = module.anubis.service_port
|
||||||
tls_secret_name = var.tls_secret_name
|
tls_secret_name = var.tls_secret_name
|
||||||
exclude_crowdsec = true
|
exclude_crowdsec = true
|
||||||
|
anti_ai_scraping = false
|
||||||
extra_annotations = {
|
extra_annotations = {
|
||||||
"gethomepage.dev/enabled" = "true"
|
"gethomepage.dev/enabled" = "true"
|
||||||
"gethomepage.dev/name" = "F1 Stream"
|
"gethomepage.dev/name" = "F1 Stream"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue