[ci skip] f1-stream: add stream health checker and HLS proxy (Phases 4-5)
Phase 4 - Stream Health and Fallback: - StreamHealthChecker with partial GET validation of m3u8 content - Bitrate extraction from BANDWIDTH tags - Response time measurement for quality ranking - Fallback ordering: live first, fastest response time first - GET /streams now only returns health-verified streams Phase 5 - HLS Proxy Core: - GET /proxy?url= - m3u8 playlist fetch with full URI rewriting - GET /relay?url= - chunked segment relay (never buffers full segment) - m3u8 rewriter handles master, variant, and segment URIs - Base64url encoding for URL parameters - CORS middleware for browser playback - Range header forwarding for seeking support
This commit is contained in:
parent
a9a4ac37a2
commit
6867036087
6 changed files with 926 additions and 20 deletions
|
|
@ -15,6 +15,9 @@ class ExtractedStream:
|
|||
title: str = "" # e.g., "F1 Race Live"
|
||||
extracted_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
||||
is_live: bool = False # Whether it passed health check
|
||||
response_time_ms: int = 0 # Health check response time (lower = better)
|
||||
checked_at: str = "" # ISO timestamp of last health check
|
||||
bitrate: int = 0 # Bitrate in bps if detectable from m3u8 playlist
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Serialize to a plain dictionary for JSON responses."""
|
||||
|
|
@ -26,4 +29,7 @@ class ExtractedStream:
|
|||
"title": self.title,
|
||||
"extracted_at": self.extracted_at,
|
||||
"is_live": self.is_live,
|
||||
"response_time_ms": self.response_time_ms,
|
||||
"checked_at": self.checked_at,
|
||||
"bitrate": self.bitrate,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,19 +1,25 @@
|
|||
"""Extraction service - manages extraction lifecycle: polling, caching, serving."""
|
||||
"""Extraction service - manages extraction lifecycle: polling, caching, health checking, serving."""
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from backend.extractors.models import ExtractedStream
|
||||
from backend.extractors.registry import ExtractorRegistry
|
||||
from backend.health import StreamHealthChecker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExtractionService:
|
||||
"""Manages the extraction lifecycle: polling, caching, and serving results.
|
||||
"""Manages the extraction lifecycle: polling, caching, health checking, and serving.
|
||||
|
||||
Extraction runs on a background schedule (via APScheduler), never on
|
||||
client request path. Results are cached in memory, keyed by site_key.
|
||||
client request path. After extraction, health checks verify each stream
|
||||
is live. Results are cached in memory, keyed by site_key.
|
||||
|
||||
GET /streams only returns streams that passed health checks, sorted by:
|
||||
1. is_live (live streams first)
|
||||
2. response_time_ms (fastest first)
|
||||
"""
|
||||
|
||||
def __init__(self, registry: ExtractorRegistry) -> None:
|
||||
|
|
@ -22,18 +28,36 @@ class ExtractionService:
|
|||
self._cache: dict[str, list[ExtractedStream]] = {}
|
||||
self._last_run: str | None = None
|
||||
self._last_run_stream_count: int = 0
|
||||
self._health_checker = StreamHealthChecker()
|
||||
|
||||
async def run_extraction(self) -> None:
|
||||
"""Run all extractors and cache their results.
|
||||
"""Run all extractors, health-check results, and cache them.
|
||||
|
||||
This is called by the background scheduler. Each extractor's
|
||||
results replace its previous cache entry entirely.
|
||||
results replace its previous cache entry entirely. After extraction,
|
||||
health checks are run to verify streams are live and measure
|
||||
response times.
|
||||
"""
|
||||
logger.info("Starting extraction run...")
|
||||
start = datetime.now(timezone.utc)
|
||||
|
||||
streams = await self._registry.extract_all()
|
||||
|
||||
# Run health checks on all extracted streams
|
||||
if streams:
|
||||
stream_dicts = [s.to_dict() for s in streams]
|
||||
health_map = await self._health_checker.check_all(stream_dicts)
|
||||
|
||||
# Update stream objects with health check results
|
||||
for stream in streams:
|
||||
health = health_map.get(stream.url)
|
||||
if health:
|
||||
stream.is_live = health.is_live
|
||||
stream.response_time_ms = health.response_time_ms
|
||||
stream.checked_at = health.checked_at
|
||||
if health.bitrate > 0:
|
||||
stream.bitrate = health.bitrate
|
||||
|
||||
# Group streams by site_key and update cache
|
||||
new_cache: dict[str, list[ExtractedStream]] = {}
|
||||
for stream in streams:
|
||||
|
|
@ -52,29 +76,68 @@ class ExtractionService:
|
|||
self._last_run = start.isoformat()
|
||||
self._last_run_stream_count = len(streams)
|
||||
|
||||
live_count = sum(
|
||||
1 for streams_list in self._cache.values()
|
||||
for s in streams_list if s.is_live
|
||||
)
|
||||
elapsed = (datetime.now(timezone.utc) - start).total_seconds()
|
||||
logger.info(
|
||||
"Extraction run complete: %d stream(s) from %d extractor(s) in %.1fs",
|
||||
"Extraction run complete: %d stream(s) from %d extractor(s) in %.1fs (%d live)",
|
||||
len(streams),
|
||||
len(new_cache),
|
||||
elapsed,
|
||||
live_count,
|
||||
)
|
||||
|
||||
def get_streams(self) -> list[dict]:
|
||||
"""Return all cached streams as a flat list of dicts.
|
||||
"""Return all cached streams as a sorted list of dicts.
|
||||
|
||||
Only returns streams that passed health checks (is_live=True).
|
||||
Sorted by fallback priority:
|
||||
1. is_live (live streams first) - filters to live only
|
||||
2. response_time_ms (fastest first)
|
||||
|
||||
Returns:
|
||||
List of serialized ExtractedStream dicts from all extractors.
|
||||
List of serialized ExtractedStream dicts from all extractors,
|
||||
filtered to live-only and sorted by response time.
|
||||
"""
|
||||
all_streams: list[dict] = []
|
||||
all_streams: list[ExtractedStream] = []
|
||||
for streams in self._cache.values():
|
||||
all_streams.extend(s.to_dict() for s in streams)
|
||||
return all_streams
|
||||
all_streams.extend(streams)
|
||||
|
||||
# Sort by fallback priority: live first, then fastest response
|
||||
all_streams.sort(
|
||||
key=lambda s: (not s.is_live, s.response_time_ms)
|
||||
)
|
||||
|
||||
# Only return live streams to clients
|
||||
live_streams = [s for s in all_streams if s.is_live]
|
||||
return [s.to_dict() for s in live_streams]
|
||||
|
||||
def get_all_streams_unfiltered(self) -> list[dict]:
|
||||
"""Return ALL cached streams including unhealthy ones.
|
||||
|
||||
Used for debugging and status endpoints. Sorted by fallback priority
|
||||
but includes streams that failed health checks.
|
||||
|
||||
Returns:
|
||||
List of all serialized ExtractedStream dicts.
|
||||
"""
|
||||
all_streams: list[ExtractedStream] = []
|
||||
for streams in self._cache.values():
|
||||
all_streams.extend(streams)
|
||||
|
||||
# Sort by fallback priority: live first, then fastest response
|
||||
all_streams.sort(
|
||||
key=lambda s: (not s.is_live, s.response_time_ms)
|
||||
)
|
||||
|
||||
return [s.to_dict() for s in all_streams]
|
||||
|
||||
def get_streams_for_session(self, session_type: str) -> list[dict]:
|
||||
"""Return cached streams filtered/annotated for a specific session type.
|
||||
|
||||
Currently returns all streams (extractors don't yet differentiate by
|
||||
Currently returns all live streams (extractors don't yet differentiate by
|
||||
session type). This method exists as a hook for future filtering,
|
||||
e.g., some extractors might only have race streams but not FP streams.
|
||||
|
||||
|
|
@ -82,7 +145,7 @@ class ExtractionService:
|
|||
session_type: The F1 session type (e.g., "race", "qualifying", "fp1").
|
||||
|
||||
Returns:
|
||||
List of serialized ExtractedStream dicts.
|
||||
List of serialized ExtractedStream dicts (live only, sorted).
|
||||
"""
|
||||
# For now, all streams are potentially relevant to any session.
|
||||
# Future extractors may tag streams with session types, at which
|
||||
|
|
@ -103,19 +166,26 @@ class ExtractionService:
|
|||
for info in extractor_list:
|
||||
key = info["site_key"]
|
||||
cached = self._cache.get(key, [])
|
||||
live_count = sum(1 for s in cached if s.is_live)
|
||||
extractor_statuses.append(
|
||||
{
|
||||
"site_key": key,
|
||||
"site_name": info["site_name"],
|
||||
"cached_streams": len(cached),
|
||||
"live_streams": live_count,
|
||||
}
|
||||
)
|
||||
|
||||
total_cached = sum(len(streams) for streams in self._cache.values())
|
||||
total_live = sum(
|
||||
1 for streams in self._cache.values()
|
||||
for s in streams if s.is_live
|
||||
)
|
||||
|
||||
return {
|
||||
"extractors": extractor_statuses,
|
||||
"total_cached_streams": sum(
|
||||
len(streams) for streams in self._cache.values()
|
||||
),
|
||||
"total_cached_streams": total_cached,
|
||||
"total_live_streams": total_live,
|
||||
"last_run": self._last_run,
|
||||
"last_run_stream_count": self._last_run_stream_count,
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue