[ci skip] f1-stream: add extractor framework with demo streams (Phase 3)

- BaseExtractor ABC with health_check method
- ExtractorRegistry with concurrent fan-out extraction
- ExtractionService with in-memory cache and background polling
- DemoExtractor with 3 public HLS test streams
- Adaptive polling: 5min during live sessions, 30min otherwise
- GET /streams, GET /extractors, POST /extract endpoints
This commit is contained in:
Viktor Barzin 2026-02-23 23:02:56 +00:00
parent 461e355a5d
commit d15337e838
8 changed files with 608 additions and 5 deletions

View file

@ -1,12 +1,14 @@
"""F1 Streams - FastAPI backend with schedule service."""
"""F1 Streams - FastAPI backend with schedule and stream extraction services."""
import logging
from contextlib import asynccontextmanager
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.interval import IntervalTrigger
from fastapi import FastAPI
from backend.extractors import create_extraction_service
from backend.schedule import ScheduleService
logging.basicConfig(
@ -16,21 +18,67 @@ logging.basicConfig(
logger = logging.getLogger(__name__)
schedule_service = ScheduleService()
extraction_service = create_extraction_service()
scheduler = AsyncIOScheduler()
async def _scheduled_refresh() -> None:
"""Callback for APScheduler daily refresh."""
"""Callback for APScheduler daily schedule refresh."""
logger.info("Running scheduled schedule refresh...")
await schedule_service.refresh()
async def _scheduled_extraction() -> None:
"""Callback for APScheduler stream extraction.
Adjusts its own interval based on whether a session is currently live:
- During a live session: reschedule to every 5 minutes
- Otherwise: reschedule to every 30 minutes
"""
logger.info("Running scheduled extraction...")
await extraction_service.run_extraction()
# Check if any session is currently live and adjust polling interval
schedule_data = schedule_service.get_schedule()
is_live = False
for race in schedule_data.get("races", []):
for session in race.get("sessions", []):
if session.get("status") == "live":
is_live = True
break
if is_live:
break
# Update the extraction job interval based on live status
job = scheduler.get_job("stream_extraction")
if job:
current_interval = getattr(job.trigger, "interval_length", None)
desired_interval = 300 if is_live else 1800 # 5 min or 30 min
if current_interval != desired_interval:
interval_minutes = 5 if is_live else 30
scheduler.reschedule_job(
"stream_extraction",
trigger=IntervalTrigger(minutes=interval_minutes),
)
logger.info(
"Extraction interval adjusted to %d minutes (live=%s)",
interval_minutes,
is_live,
)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Startup and shutdown lifecycle handler."""
# Startup: load schedule and start background scheduler
await schedule_service.initialize()
# Run initial extraction
logger.info("Running initial stream extraction...")
await extraction_service.run_extraction()
# Schedule daily schedule refresh
scheduler.add_job(
_scheduled_refresh,
trigger=CronTrigger(hour=3, minute=0, timezone="UTC"),
@ -38,8 +86,18 @@ async def lifespan(app: FastAPI):
name="Refresh F1 schedule daily at 03:00 UTC",
replace_existing=True,
)
# Schedule periodic stream extraction (default: every 30 minutes)
scheduler.add_job(
_scheduled_extraction,
trigger=IntervalTrigger(minutes=30),
id="stream_extraction",
name="Extract streams from all registered sites",
replace_existing=True,
)
scheduler.start()
logger.info("APScheduler started - daily refresh at 03:00 UTC")
logger.info("APScheduler started - schedule refresh at 03:00 UTC, extraction every 30m")
yield
@ -51,6 +109,9 @@ async def lifespan(app: FastAPI):
app = FastAPI(title="F1 Streams", lifespan=lifespan)
# --- Health & Info ---
@app.get("/health")
async def health():
return {"status": "ok"}
@ -58,7 +119,10 @@ async def health():
@app.get("/")
async def root():
return {"service": "f1-streams", "version": "2.0.1"}
return {"service": "f1-streams", "version": "3.0.0"}
# --- Schedule ---
@app.get("/schedule")
@ -74,6 +138,37 @@ async def refresh_schedule():
return {"status": "refreshed"}
# --- Streams & Extraction ---
@app.get("/streams")
async def get_streams():
"""Return all currently cached streams from all extractors."""
streams = extraction_service.get_streams()
return {
"streams": streams,
"count": len(streams),
}
@app.get("/extractors")
async def get_extractors():
"""List registered extractors and their current status."""
return extraction_service.get_status()
@app.post("/extract")
async def trigger_extraction():
"""Manually trigger an extraction run across all registered extractors."""
await extraction_service.run_extraction()
status = extraction_service.get_status()
return {
"status": "extraction_complete",
"streams_found": status["total_cached_streams"],
"extractors_run": len(status["extractors"]),
}
if __name__ == "__main__":
import uvicorn