infra/stacks/f1-stream/files/backend/health.py
Viktor Barzin 6867036087 [ci skip] f1-stream: add stream health checker and HLS proxy (Phases 4-5)
Phase 4 - Stream Health and Fallback:
- StreamHealthChecker with partial GET validation of m3u8 content
- Bitrate extraction from BANDWIDTH tags
- Response time measurement for quality ranking
- Fallback ordering: live first, fastest response time first
- GET /streams now only returns health-verified streams

Phase 5 - HLS Proxy Core:
- GET /proxy?url= - m3u8 playlist fetch with full URI rewriting
- GET /relay?url= - chunked segment relay (never buffers full segment)
- m3u8 rewriter handles master, variant, and segment URIs
- Base64url encoding for URL parameters
- CORS middleware for browser playback
- Range header forwarding for seeking support
2026-02-23 23:41:16 +00:00

233 lines
7.9 KiB
Python

"""Stream health checker - verifies extracted streams are live and responsive.
Performs GET requests against m3u8 URLs to verify they contain valid HLS
playlists (#EXTM3U header), measures response times for quality ranking,
and supports concurrent checking of multiple streams.
"""
import asyncio
import logging
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
import httpx
logger = logging.getLogger(__name__)
# How long to wait for a single health check (seconds)
HEALTH_CHECK_TIMEOUT = 10.0
# Maximum bytes to read when verifying m3u8 content
# We only need to see the #EXTM3U header and a few lines
MAX_CONTENT_BYTES = 8192
# User-Agent to send with health check requests
USER_AGENT = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
)
@dataclass
class StreamHealth:
"""Result of a single stream health check."""
url: str
is_live: bool
response_time_ms: int # Lower = better quality indicator
checked_at: str = field(
default_factory=lambda: datetime.now(timezone.utc).isoformat()
)
error: str = "" # Error message if not live
bitrate: int = 0 # Bitrate in bps if detectable from playlist
def to_dict(self) -> dict:
"""Serialize to a plain dictionary for JSON responses."""
return {
"url": self.url,
"is_live": self.is_live,
"response_time_ms": self.response_time_ms,
"checked_at": self.checked_at,
"error": self.error,
"bitrate": self.bitrate,
}
def _extract_bitrate(content: str) -> int:
"""Try to extract bitrate from m3u8 playlist content.
Looks for BANDWIDTH= in #EXT-X-STREAM-INF tags. Returns the highest
bitrate found, or 0 if none detected.
"""
max_bitrate = 0
for line in content.splitlines():
if "BANDWIDTH=" in line:
try:
# Parse BANDWIDTH=<number> from the tag
for part in line.split(","):
part = part.strip()
if part.startswith("BANDWIDTH="):
bw = int(part.split("=", 1)[1])
max_bitrate = max(max_bitrate, bw)
except (ValueError, IndexError):
continue
return max_bitrate
class StreamHealthChecker:
"""Background health checker for extracted streams.
Verifies streams are live by performing a partial GET on the m3u8 URL,
checking for valid HLS content (#EXTM3U header), and measuring response
time as a quality indicator.
"""
def __init__(self, timeout: float = HEALTH_CHECK_TIMEOUT) -> None:
self._timeout = timeout
async def check_stream(self, url: str) -> StreamHealth:
"""Check if a stream URL is live by doing a partial GET on the m3u8.
Verification steps:
1. GET the m3u8 URL (not just HEAD - need to verify playlist content)
2. Check if response contains #EXTM3U header
3. Measure response time as a quality indicator
4. Extract bitrate info if available
Args:
url: The m3u8 stream URL to check.
Returns:
StreamHealth with is_live, response_time_ms, checked_at, and
optional bitrate and error information.
"""
start_time = time.monotonic()
checked_at = datetime.now(timezone.utc).isoformat()
try:
async with httpx.AsyncClient(
timeout=self._timeout,
follow_redirects=True,
headers={
"User-Agent": USER_AGENT,
"Accept": "*/*",
},
) as client:
# Use a partial GET with Range header to limit download
# but fall back to reading limited bytes if Range not supported
response = await client.get(
url,
headers={"Range": f"bytes=0-{MAX_CONTENT_BYTES - 1}"},
)
elapsed_ms = int((time.monotonic() - start_time) * 1000)
# Accept 200 (full content) or 206 (partial content)
if response.status_code not in (200, 206):
return StreamHealth(
url=url,
is_live=False,
response_time_ms=elapsed_ms,
checked_at=checked_at,
error=f"HTTP {response.status_code}",
)
content = response.text[:MAX_CONTENT_BYTES]
# Verify it's a valid HLS playlist
if "#EXTM3U" not in content:
return StreamHealth(
url=url,
is_live=False,
response_time_ms=elapsed_ms,
checked_at=checked_at,
error="Response does not contain #EXTM3U header",
)
# Extract bitrate info if available
bitrate = _extract_bitrate(content)
return StreamHealth(
url=url,
is_live=True,
response_time_ms=elapsed_ms,
checked_at=checked_at,
bitrate=bitrate,
)
except httpx.TimeoutException:
elapsed_ms = int((time.monotonic() - start_time) * 1000)
logger.debug("Health check timed out for %s", url)
return StreamHealth(
url=url,
is_live=False,
response_time_ms=elapsed_ms,
checked_at=checked_at,
error="Timeout",
)
except httpx.HTTPError as e:
elapsed_ms = int((time.monotonic() - start_time) * 1000)
logger.debug("Health check HTTP error for %s: %s", url, e)
return StreamHealth(
url=url,
is_live=False,
response_time_ms=elapsed_ms,
checked_at=checked_at,
error=f"HTTP error: {e}",
)
except Exception as e:
elapsed_ms = int((time.monotonic() - start_time) * 1000)
logger.exception("Unexpected error during health check for %s", url)
return StreamHealth(
url=url,
is_live=False,
response_time_ms=elapsed_ms,
checked_at=checked_at,
error=f"Unexpected error: {e}",
)
async def check_all(
self, streams: list[dict],
) -> dict[str, StreamHealth]:
"""Check all streams concurrently, return health map keyed by URL.
Args:
streams: List of stream dicts (must have a "url" key).
Returns:
Dictionary mapping stream URL to its StreamHealth result.
"""
urls = [s["url"] for s in streams if "url" in s]
if not urls:
return {}
logger.info("Running health checks on %d stream(s)...", len(urls))
# Run all checks concurrently
tasks = [self.check_stream(url) for url in urls]
results = await asyncio.gather(*tasks, return_exceptions=True)
health_map: dict[str, StreamHealth] = {}
for url, result in zip(urls, results):
if isinstance(result, Exception):
logger.error("Health check task failed for %s: %s", url, result)
health_map[url] = StreamHealth(
url=url,
is_live=False,
response_time_ms=0,
error=f"Task error: {result}",
)
else:
health_map[url] = result
live_count = sum(1 for h in health_map.values() if h.is_live)
logger.info(
"Health checks complete: %d/%d streams are live",
live_count,
len(health_map),
)
return health_map