f1-stream: consume Forgejo-registry image; drop in-monorepo source
The actively-developed f1-stream (infra files/ copy: 12 active extractors +
Playwright/chrome-service verifier) is now its own repo viktor/f1-stream and is
the deployed app (replacing the stale March github build).
- main.tf: image -> forgejo.viktorbarzin.me/viktor/f1-stream:${var.image_tag}
+ image_pull_secrets registry-credentials. Image stays in KEEL_IGNORE_IMAGE.
- Remove stacks/f1-stream/files/ (source now in viktor/f1-stream).
- docs/plans: extraction design + plan pair.
Applied via tg + kubectl set image to forgejo:24857a82; live /health green.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
99f9bf8d89
commit
e8bfb4d06b
51 changed files with 131 additions and 9556 deletions
|
|
@ -1,488 +0,0 @@
|
|||
"""F1 Streams - FastAPI backend with schedule, stream extraction, health checking, HLS proxy, and token refresh."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
from fastapi import FastAPI, Query, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import BaseModel
|
||||
from starlette.responses import Response, StreamingResponse
|
||||
|
||||
from backend.embed_proxy import fetch_embed, relay_asset
|
||||
from backend.extractors import create_extraction_service
|
||||
from backend.proxy import proxy_playlist, relay_stream
|
||||
from backend.schedule import ScheduleService
|
||||
from backend.token_refresh import TokenRefreshManager
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
schedule_service = ScheduleService()
|
||||
extraction_service = create_extraction_service()
|
||||
token_refresh_manager = TokenRefreshManager(extraction_service)
|
||||
scheduler = AsyncIOScheduler()
|
||||
|
||||
|
||||
# --- Pydantic models for request bodies ---
|
||||
|
||||
|
||||
class ActivateStreamRequest(BaseModel):
|
||||
"""Request body for POST /streams/activate."""
|
||||
|
||||
url: str
|
||||
site_key: str = ""
|
||||
|
||||
|
||||
class DeactivateStreamRequest(BaseModel):
|
||||
"""Request body for POST /streams/deactivate."""
|
||||
|
||||
url: str
|
||||
|
||||
|
||||
# --- Scheduled callbacks ---
|
||||
|
||||
|
||||
async def _scheduled_refresh() -> None:
|
||||
"""Callback for APScheduler daily schedule refresh."""
|
||||
logger.info("Running scheduled schedule refresh...")
|
||||
await schedule_service.refresh()
|
||||
|
||||
|
||||
async def _scheduled_extraction() -> None:
|
||||
"""Callback for APScheduler stream extraction.
|
||||
|
||||
Adjusts its own interval based on whether a session is currently live:
|
||||
- During a live session: reschedule to every 5 minutes
|
||||
- Otherwise: reschedule to every 30 minutes
|
||||
"""
|
||||
logger.info("Running scheduled extraction...")
|
||||
await extraction_service.run_extraction()
|
||||
|
||||
# Check if any session is currently live and adjust polling interval
|
||||
schedule_data = schedule_service.get_schedule()
|
||||
is_live = False
|
||||
for race in schedule_data.get("races", []):
|
||||
for session in race.get("sessions", []):
|
||||
if session.get("status") == "live":
|
||||
is_live = True
|
||||
break
|
||||
if is_live:
|
||||
break
|
||||
|
||||
# Update the extraction job interval based on live status
|
||||
job = scheduler.get_job("stream_extraction")
|
||||
if job:
|
||||
current_interval = getattr(job.trigger, "interval_length", None)
|
||||
desired_interval = 300 if is_live else 1800 # 5 min or 30 min
|
||||
|
||||
if current_interval != desired_interval:
|
||||
interval_minutes = 5 if is_live else 30
|
||||
scheduler.reschedule_job(
|
||||
"stream_extraction",
|
||||
trigger=IntervalTrigger(minutes=interval_minutes),
|
||||
)
|
||||
logger.info(
|
||||
"Extraction interval adjusted to %d minutes (live=%s)",
|
||||
interval_minutes,
|
||||
is_live,
|
||||
)
|
||||
|
||||
|
||||
async def _scheduled_token_refresh() -> None:
|
||||
"""Callback for APScheduler token refresh.
|
||||
|
||||
Only performs work when there are active streams. Re-runs extractors
|
||||
to get fresh CDN tokens for streams being actively watched.
|
||||
"""
|
||||
if not token_refresh_manager.has_active_streams:
|
||||
return
|
||||
|
||||
logger.info("Running scheduled token refresh...")
|
||||
try:
|
||||
await token_refresh_manager.refresh_active_streams()
|
||||
except Exception:
|
||||
logger.exception("Token refresh failed (non-fatal)")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Startup and shutdown lifecycle handler."""
|
||||
# Startup: load schedule and start background scheduler
|
||||
await schedule_service.initialize()
|
||||
|
||||
# Schedule daily schedule refresh
|
||||
scheduler.add_job(
|
||||
_scheduled_refresh,
|
||||
trigger=CronTrigger(hour=3, minute=0, timezone="UTC"),
|
||||
id="daily_schedule_refresh",
|
||||
name="Refresh F1 schedule daily at 03:00 UTC",
|
||||
replace_existing=True,
|
||||
)
|
||||
|
||||
# Schedule periodic stream extraction (default: every 30 minutes).
|
||||
# next_run_time fires the first run 8s after startup. We don't run
|
||||
# extraction inline here because it calls the playback verifier,
|
||||
# which hits http://127.0.0.1:8000/embed for embed streams — uvicorn
|
||||
# isn't listening yet inside the lifespan startup phase.
|
||||
scheduler.add_job(
|
||||
_scheduled_extraction,
|
||||
trigger=IntervalTrigger(minutes=30),
|
||||
id="stream_extraction",
|
||||
name="Extract streams from all registered sites",
|
||||
replace_existing=True,
|
||||
next_run_time=datetime.now(timezone.utc) + timedelta(seconds=8),
|
||||
)
|
||||
|
||||
# Schedule token refresh every 4 minutes (safe margin for 5-min CDN tokens).
|
||||
# The callback is a no-op when there are no active streams.
|
||||
scheduler.add_job(
|
||||
_scheduled_token_refresh,
|
||||
trigger=IntervalTrigger(minutes=4),
|
||||
id="token_refresh",
|
||||
name="Refresh CDN tokens for active streams",
|
||||
replace_existing=True,
|
||||
)
|
||||
|
||||
scheduler.start()
|
||||
logger.info(
|
||||
"APScheduler started - schedule refresh at 03:00 UTC, extraction every 30m, token refresh every 4m"
|
||||
)
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
scheduler.shutdown(wait=False)
|
||||
logger.info("APScheduler shut down")
|
||||
try:
|
||||
await extraction_service.shutdown()
|
||||
except Exception:
|
||||
logger.exception("extraction_service shutdown failed")
|
||||
|
||||
|
||||
app = FastAPI(title="F1 Streams", lifespan=lifespan)
|
||||
|
||||
# --- CORS Middleware ---
|
||||
# Required for browser-based HLS players to access proxy/relay endpoints
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_methods=["GET", "POST", "OPTIONS"],
|
||||
allow_headers=["Range", "Content-Type"],
|
||||
expose_headers=["Content-Range", "Content-Length", "Content-Type"],
|
||||
)
|
||||
|
||||
|
||||
# --- Health & Info ---
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
# --- Schedule ---
|
||||
|
||||
|
||||
@app.get("/schedule")
|
||||
async def get_schedule():
|
||||
"""Return the F1 race schedule for the current season with session statuses."""
|
||||
return schedule_service.get_schedule()
|
||||
|
||||
|
||||
@app.post("/schedule/refresh")
|
||||
async def refresh_schedule():
|
||||
"""Manually trigger a schedule refresh from the jolpica API."""
|
||||
await schedule_service.refresh()
|
||||
return {"status": "refreshed"}
|
||||
|
||||
|
||||
# --- Streams & Extraction ---
|
||||
|
||||
|
||||
@app.get("/streams")
|
||||
async def get_streams():
|
||||
"""Return all currently cached streams that passed health checks.
|
||||
|
||||
Streams are sorted by fallback priority:
|
||||
1. Live streams only (is_live=True)
|
||||
2. Fastest response time first (lowest response_time_ms)
|
||||
"""
|
||||
streams = extraction_service.get_streams()
|
||||
return {
|
||||
"streams": streams,
|
||||
"count": len(streams),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/streams/all")
|
||||
async def get_all_streams():
|
||||
"""Return ALL cached streams including unhealthy ones (for debugging).
|
||||
|
||||
Unlike GET /streams, this endpoint includes streams that failed health
|
||||
checks. Useful for diagnosing extraction or health check issues.
|
||||
"""
|
||||
streams = extraction_service.get_all_streams_unfiltered()
|
||||
return {
|
||||
"streams": streams,
|
||||
"count": len(streams),
|
||||
}
|
||||
|
||||
|
||||
@app.post("/streams/activate")
|
||||
async def activate_stream(body: ActivateStreamRequest):
|
||||
"""Mark a stream as actively being watched.
|
||||
|
||||
When a stream is active, the token refresh manager will periodically
|
||||
re-run the extractor that found it to get fresh CDN tokens before
|
||||
they expire.
|
||||
|
||||
If site_key is not provided, attempts to look it up from the cached
|
||||
streams.
|
||||
|
||||
Body:
|
||||
{"url": "https://...", "site_key": "optional-site-key"}
|
||||
"""
|
||||
url = body.url
|
||||
site_key = body.site_key
|
||||
|
||||
# If site_key not provided, try to look it up from cached streams
|
||||
if not site_key:
|
||||
for streams in extraction_service._cache.values():
|
||||
for stream in streams:
|
||||
if stream.url == url:
|
||||
site_key = stream.site_key
|
||||
break
|
||||
if site_key:
|
||||
break
|
||||
|
||||
if not site_key:
|
||||
return {
|
||||
"status": "error",
|
||||
"detail": "Could not determine site_key for this URL. Provide it explicitly.",
|
||||
}
|
||||
|
||||
token_refresh_manager.mark_stream_active(url, site_key)
|
||||
return {
|
||||
"status": "activated",
|
||||
"url": url,
|
||||
"site_key": site_key,
|
||||
"active_count": len(token_refresh_manager.get_active_streams()),
|
||||
}
|
||||
|
||||
|
||||
@app.post("/streams/deactivate")
|
||||
async def deactivate_stream(body: DeactivateStreamRequest):
|
||||
"""Mark a stream as no longer being watched.
|
||||
|
||||
Stops the token refresh manager from refreshing CDN tokens for this stream.
|
||||
|
||||
Body:
|
||||
{"url": "https://..."}
|
||||
"""
|
||||
token_refresh_manager.mark_stream_inactive(body.url)
|
||||
return {
|
||||
"status": "deactivated",
|
||||
"url": body.url,
|
||||
"active_count": len(token_refresh_manager.get_active_streams()),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/streams/active")
|
||||
async def get_active_streams():
|
||||
"""List currently active streams with their refresh status.
|
||||
|
||||
Returns all streams that are being actively watched, including
|
||||
their current (potentially refreshed) URLs and refresh counts.
|
||||
"""
|
||||
active = token_refresh_manager.get_active_streams()
|
||||
return {
|
||||
"streams": active,
|
||||
"count": len(active),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/extractors")
|
||||
async def get_extractors():
|
||||
"""List registered extractors and their current status."""
|
||||
return extraction_service.get_status()
|
||||
|
||||
|
||||
@app.post("/extract")
|
||||
async def trigger_extraction():
|
||||
"""Manually trigger an extraction run across all registered extractors."""
|
||||
await extraction_service.run_extraction()
|
||||
status = extraction_service.get_status()
|
||||
return {
|
||||
"status": "extraction_complete",
|
||||
"streams_found": status["total_cached_streams"],
|
||||
"live_streams": status["total_live_streams"],
|
||||
"extractors_run": len(status["extractors"]),
|
||||
}
|
||||
|
||||
|
||||
# --- HLS Proxy ---
|
||||
|
||||
|
||||
def _get_proxy_base(request: Request) -> str:
|
||||
"""Derive the proxy base URL from the incoming request.
|
||||
|
||||
Uses X-Forwarded-Proto and X-Forwarded-Host headers if present
|
||||
(behind a reverse proxy), otherwise falls back to request URL.
|
||||
"""
|
||||
proto = request.headers.get("x-forwarded-proto", request.url.scheme)
|
||||
host = request.headers.get("x-forwarded-host", request.url.netloc)
|
||||
return f"{proto}://{host}"
|
||||
|
||||
|
||||
@app.get("/proxy")
|
||||
async def proxy_endpoint(
|
||||
request: Request,
|
||||
url: str = Query(..., description="Base64url-encoded m3u8 playlist URL"),
|
||||
quality: int | None = Query(
|
||||
None,
|
||||
description="0-based quality variant index (0=highest bandwidth). "
|
||||
"Only applies to master playlists.",
|
||||
),
|
||||
):
|
||||
"""Proxy an upstream m3u8 playlist with URI rewriting.
|
||||
|
||||
Fetches the upstream m3u8 playlist, rewrites all URIs to route through
|
||||
our /proxy (for sub-playlists) and /relay (for segments) endpoints,
|
||||
and returns the rewritten playlist.
|
||||
|
||||
The `url` parameter must be base64url-encoded to avoid URL encoding issues.
|
||||
|
||||
If `quality` is specified and the upstream is a master playlist (with
|
||||
multiple quality variants), the proxy will fetch the selected variant's
|
||||
media playlist directly instead of returning the master playlist.
|
||||
Quality index 0 = highest bandwidth, 1 = second highest, etc.
|
||||
|
||||
Examples:
|
||||
GET /proxy?url=aHR0cHM6Ly9leGFtcGxlLmNvbS9zdHJlYW0ubTN1OA
|
||||
GET /proxy?url=aHR0cHM6Ly9leGFtcGxlLmNvbS9zdHJlYW0ubTN1OA&quality=0
|
||||
"""
|
||||
# Check if we have a fresher URL from token refresh
|
||||
fresh_url = token_refresh_manager.get_fresh_url(url)
|
||||
if fresh_url != url:
|
||||
logger.info("Using refreshed URL from token manager")
|
||||
|
||||
proxy_base = _get_proxy_base(request)
|
||||
rewritten = await proxy_playlist(fresh_url, proxy_base, quality=quality)
|
||||
|
||||
return Response(
|
||||
content=rewritten,
|
||||
media_type="application/vnd.apple.mpegurl",
|
||||
headers={
|
||||
"Cache-Control": "no-cache, no-store, must-revalidate",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@app.get("/relay")
|
||||
async def relay_endpoint(
|
||||
request: Request,
|
||||
url: str = Query(..., description="Base64url-encoded segment URL"),
|
||||
):
|
||||
"""Relay an upstream media segment as a chunked byte stream.
|
||||
|
||||
Fetches the upstream segment (TS, fMP4, init segment, etc.) and streams
|
||||
it to the client using chunked transfer encoding. Never buffers the
|
||||
full segment in memory.
|
||||
|
||||
The `url` parameter must be base64url-encoded to avoid URL encoding issues.
|
||||
|
||||
Supports HTTP Range requests for seeking.
|
||||
|
||||
Example:
|
||||
GET /relay?url=aHR0cHM6Ly9leGFtcGxlLmNvbS9zZWdtZW50LnRz
|
||||
"""
|
||||
range_header = request.headers.get("range")
|
||||
|
||||
stream_gen, headers, status_code = await relay_stream(url, range_header)
|
||||
|
||||
return StreamingResponse(
|
||||
stream_gen,
|
||||
status_code=status_code,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
|
||||
# --- Embed iframe-stripping proxy ---
|
||||
|
||||
|
||||
@app.get("/embed")
|
||||
async def embed_proxy(url: str = Query(..., description="Base64url-encoded embed URL")):
|
||||
"""Proxy a third-party embed page so it can be iframed in our origin.
|
||||
|
||||
Strips X-Frame-Options and CSP frame-ancestors from the upstream
|
||||
response, injects a base href + frame-buster-defeat script, and
|
||||
forwards a plausible Referer/Origin to bypass upstream allowlists.
|
||||
"""
|
||||
body, headers, status_code = await fetch_embed(url)
|
||||
return Response(content=body, headers=headers, status_code=status_code)
|
||||
|
||||
|
||||
@app.get("/embed-asset")
|
||||
async def embed_asset(
|
||||
request: Request,
|
||||
url: str = Query(..., description="Base64url-encoded subresource URL"),
|
||||
):
|
||||
"""Relay an upstream subresource (JS/CSS/image/etc.) for the embed proxy.
|
||||
|
||||
Used as a fallback when an upstream blocks hotlinked assets via Origin
|
||||
or Referer checks. Most assets load directly via the injected <base>
|
||||
tag without going through this endpoint.
|
||||
"""
|
||||
range_header = request.headers.get("range")
|
||||
stream_gen, headers, status_code = await relay_asset(url, range_header)
|
||||
return StreamingResponse(stream_gen, headers=headers, status_code=status_code)
|
||||
|
||||
|
||||
# --- Frontend Static Files ---
|
||||
# Mount the SvelteKit static build AFTER all API routes so API endpoints take priority.
|
||||
# SvelteKit adapter-static with ssr=false produces {page}.html files and a fallback index.html.
|
||||
# Starlette StaticFiles(html=True) only checks {path}/index.html, not {path}.html.
|
||||
# We use a catch-all route to handle both patterns and the SPA fallback.
|
||||
_frontend_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), "..", "frontend", "build"))
|
||||
if os.path.exists(_frontend_dir):
|
||||
from starlette.responses import FileResponse, HTMLResponse
|
||||
|
||||
_fallback_path = os.path.join(_frontend_dir, "index.html")
|
||||
|
||||
@app.get("/{path:path}")
|
||||
async def serve_frontend(path: str):
|
||||
"""Serve SvelteKit frontend files with SPA fallback."""
|
||||
for candidate in [
|
||||
os.path.join(_frontend_dir, path),
|
||||
os.path.join(_frontend_dir, f"{path}.html"),
|
||||
os.path.join(_frontend_dir, path, "index.html"),
|
||||
]:
|
||||
real = os.path.realpath(candidate)
|
||||
if real.startswith(_frontend_dir) and os.path.isfile(real):
|
||||
return FileResponse(real)
|
||||
# SPA fallback for client-side routing
|
||||
if os.path.isfile(_fallback_path):
|
||||
return FileResponse(_fallback_path)
|
||||
return Response(content="Not Found", status_code=404)
|
||||
|
||||
logger.info("Serving frontend from %s", _frontend_dir)
|
||||
else:
|
||||
# Fallback root when no frontend build exists
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {"service": "f1-streams", "version": "5.0.0"}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
Loading…
Add table
Add a link
Reference in a new issue