infra/stacks/f1-stream/files/backend/health.py
Viktor Barzin 51b8081594 f1-stream: add real F1 stream extractors and iframe player support
Add three new extractors (Streamed.pk, DaddyLive, Aceztrims) for live
F1 streams. Extend ExtractedStream model with stream_type/embed_url
fields, skip health checks for embed streams, fix broken Akamai demo
stream, add variant playlist validation, and add iframe player support
in the frontend for embed-type streams.
2026-03-01 14:35:19 +00:00

301 lines
11 KiB
Python

"""Stream health checker - verifies extracted streams are live and responsive.
Performs GET requests against m3u8 URLs to verify they contain valid HLS
playlists (#EXTM3U header), measures response times for quality ranking,
and supports concurrent checking of multiple streams.
"""
import asyncio
import logging
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from urllib.parse import urljoin
import httpx
logger = logging.getLogger(__name__)
# How long to wait for a single health check (seconds)
HEALTH_CHECK_TIMEOUT = 10.0
# Maximum bytes to read when verifying m3u8 content
# We only need to see the #EXTM3U header and a few lines
MAX_CONTENT_BYTES = 8192
# User-Agent to send with health check requests
USER_AGENT = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
)
@dataclass
class StreamHealth:
"""Result of a single stream health check."""
url: str
is_live: bool
response_time_ms: int # Lower = better quality indicator
checked_at: str = field(
default_factory=lambda: datetime.now(timezone.utc).isoformat()
)
error: str = "" # Error message if not live
bitrate: int = 0 # Bitrate in bps if detectable from playlist
def to_dict(self) -> dict:
"""Serialize to a plain dictionary for JSON responses."""
return {
"url": self.url,
"is_live": self.is_live,
"response_time_ms": self.response_time_ms,
"checked_at": self.checked_at,
"error": self.error,
"bitrate": self.bitrate,
}
def _extract_bitrate(content: str) -> int:
"""Try to extract bitrate from m3u8 playlist content.
Looks for BANDWIDTH= in #EXT-X-STREAM-INF tags. Returns the highest
bitrate found, or 0 if none detected.
"""
max_bitrate = 0
for line in content.splitlines():
if "BANDWIDTH=" in line:
try:
# Parse BANDWIDTH=<number> from the tag
for part in line.split(","):
part = part.strip()
if part.startswith("BANDWIDTH="):
bw = int(part.split("=", 1)[1])
max_bitrate = max(max_bitrate, bw)
except (ValueError, IndexError):
continue
return max_bitrate
class StreamHealthChecker:
"""Background health checker for extracted streams.
Verifies streams are live by performing a partial GET on the m3u8 URL,
checking for valid HLS content (#EXTM3U header), and measuring response
time as a quality indicator.
"""
def __init__(self, timeout: float = HEALTH_CHECK_TIMEOUT) -> None:
self._timeout = timeout
async def check_stream(self, url: str) -> StreamHealth:
"""Check if a stream URL is live by doing a partial GET on the m3u8.
Verification steps:
1. GET the m3u8 URL (not just HEAD - need to verify playlist content)
2. Check if response contains #EXTM3U header
3. Measure response time as a quality indicator
4. Extract bitrate info if available
Args:
url: The m3u8 stream URL to check.
Returns:
StreamHealth with is_live, response_time_ms, checked_at, and
optional bitrate and error information.
"""
start_time = time.monotonic()
checked_at = datetime.now(timezone.utc).isoformat()
try:
async with httpx.AsyncClient(
timeout=self._timeout,
follow_redirects=True,
headers={
"User-Agent": USER_AGENT,
"Accept": "*/*",
},
) as client:
# Use a partial GET with Range header to limit download
# but fall back to reading limited bytes if Range not supported
response = await client.get(
url,
headers={"Range": f"bytes=0-{MAX_CONTENT_BYTES - 1}"},
)
elapsed_ms = int((time.monotonic() - start_time) * 1000)
# Accept 200 (full content) or 206 (partial content)
if response.status_code not in (200, 206):
return StreamHealth(
url=url,
is_live=False,
response_time_ms=elapsed_ms,
checked_at=checked_at,
error=f"HTTP {response.status_code}",
)
content = response.text[:MAX_CONTENT_BYTES]
# Verify it's a valid HLS playlist
if "#EXTM3U" not in content:
return StreamHealth(
url=url,
is_live=False,
response_time_ms=elapsed_ms,
checked_at=checked_at,
error="Response does not contain #EXTM3U header",
)
# Extract bitrate info if available
bitrate = _extract_bitrate(content)
# If this is a master playlist, validate at least one variant
if "#EXT-X-STREAM-INF:" in content:
variant_ok = await self._check_first_variant(
content, url, client
)
if not variant_ok:
return StreamHealth(
url=url,
is_live=False,
response_time_ms=elapsed_ms,
checked_at=checked_at,
bitrate=bitrate,
error="Master playlist OK but variant playlists are unreachable",
)
return StreamHealth(
url=url,
is_live=True,
response_time_ms=elapsed_ms,
checked_at=checked_at,
bitrate=bitrate,
)
except httpx.TimeoutException:
elapsed_ms = int((time.monotonic() - start_time) * 1000)
logger.debug("Health check timed out for %s", url)
return StreamHealth(
url=url,
is_live=False,
response_time_ms=elapsed_ms,
checked_at=checked_at,
error="Timeout",
)
except httpx.HTTPError as e:
elapsed_ms = int((time.monotonic() - start_time) * 1000)
logger.debug("Health check HTTP error for %s: %s", url, e)
return StreamHealth(
url=url,
is_live=False,
response_time_ms=elapsed_ms,
checked_at=checked_at,
error=f"HTTP error: {e}",
)
except Exception as e:
elapsed_ms = int((time.monotonic() - start_time) * 1000)
logger.exception("Unexpected error during health check for %s", url)
return StreamHealth(
url=url,
is_live=False,
response_time_ms=elapsed_ms,
checked_at=checked_at,
error=f"Unexpected error: {e}",
)
async def _check_first_variant(
self, content: str, base_url: str, client: httpx.AsyncClient
) -> bool:
"""Check that at least one variant playlist in a master playlist is reachable.
Extracts the first variant URI from a master playlist and does a HEAD
request to verify it returns 200/206. This catches streams where the
master playlist is valid but all variant playlists are 404.
Args:
content: The master playlist text content.
base_url: The URL of the master playlist (for resolving relative URIs).
client: An existing httpx client to reuse.
Returns:
True if at least one variant is reachable, False otherwise.
"""
lines = content.splitlines()
for i, line in enumerate(lines):
if not line.strip().startswith("#EXT-X-STREAM-INF:"):
continue
# Next non-empty, non-comment line is the variant URI
for j in range(i + 1, len(lines)):
variant_uri = lines[j].strip()
if variant_uri and not variant_uri.startswith("#"):
# Resolve relative URI
if not variant_uri.startswith(("http://", "https://")):
variant_uri = urljoin(base_url, variant_uri)
try:
resp = await client.head(variant_uri)
if resp.status_code in (200, 206):
return True
# HEAD might not be supported, try GET
resp = await client.get(
variant_uri,
headers={"Range": f"bytes=0-{MAX_CONTENT_BYTES - 1}"},
)
if resp.status_code in (200, 206):
return True
logger.debug(
"Variant playlist %s returned HTTP %d",
variant_uri, resp.status_code,
)
except Exception as e:
logger.debug(
"Variant check failed for %s: %s", variant_uri, e
)
# Only check the first variant
return False
# No variants found (shouldn't happen if #EXT-X-STREAM-INF was detected)
return True
async def check_all(
self, streams: list[dict],
) -> dict[str, StreamHealth]:
"""Check all streams concurrently, return health map keyed by URL.
Args:
streams: List of stream dicts (must have a "url" key).
Returns:
Dictionary mapping stream URL to its StreamHealth result.
"""
urls = [s["url"] for s in streams if "url" in s]
if not urls:
return {}
logger.info("Running health checks on %d stream(s)...", len(urls))
# Run all checks concurrently
tasks = [self.check_stream(url) for url in urls]
results = await asyncio.gather(*tasks, return_exceptions=True)
health_map: dict[str, StreamHealth] = {}
for url, result in zip(urls, results):
if isinstance(result, Exception):
logger.error("Health check task failed for %s: %s", url, result)
health_map[url] = StreamHealth(
url=url,
is_live=False,
response_time_ms=0,
error=f"Task error: {result}",
)
else:
health_map[url] = result
live_count = sum(1 for h in health_map.values() if h.is_live)
logger.info(
"Health checks complete: %d/%d streams are live",
live_count,
len(health_map),
)
return health_map