[ci skip] f1-stream: add extractor framework with demo streams (Phase 3)
- BaseExtractor ABC with health_check method - ExtractorRegistry with concurrent fan-out extraction - ExtractionService with in-memory cache and background polling - DemoExtractor with 3 public HLS test streams - Adaptive polling: 5min during live sessions, 30min otherwise - GET /streams, GET /extractors, POST /extract endpoints
This commit is contained in:
parent
461e355a5d
commit
d15337e838
8 changed files with 608 additions and 5 deletions
49
stacks/f1-stream/files/backend/extractors/__init__.py
Normal file
49
stacks/f1-stream/files/backend/extractors/__init__.py
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
"""Stream extraction framework.
|
||||||
|
|
||||||
|
To add a new extractor:
|
||||||
|
1. Create a new file in this package (e.g., my_site.py)
|
||||||
|
2. Subclass BaseExtractor from backend.extractors.base
|
||||||
|
3. Implement site_key, site_name, and extract()
|
||||||
|
4. Import and register it in this file's create_registry() function
|
||||||
|
|
||||||
|
Example:
|
||||||
|
from backend.extractors.my_site import MySiteExtractor
|
||||||
|
registry.register(MySiteExtractor())
|
||||||
|
"""
|
||||||
|
|
||||||
|
from backend.extractors.demo import DemoExtractor
|
||||||
|
from backend.extractors.models import ExtractedStream
|
||||||
|
from backend.extractors.registry import ExtractorRegistry
|
||||||
|
from backend.extractors.service import ExtractionService
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"ExtractedStream",
|
||||||
|
"ExtractorRegistry",
|
||||||
|
"ExtractionService",
|
||||||
|
"create_registry",
|
||||||
|
"create_extraction_service",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def create_registry() -> ExtractorRegistry:
|
||||||
|
"""Create and populate the extractor registry with all known extractors.
|
||||||
|
|
||||||
|
Add new extractors here by importing and registering them.
|
||||||
|
"""
|
||||||
|
registry = ExtractorRegistry()
|
||||||
|
|
||||||
|
# --- Register extractors below ---
|
||||||
|
registry.register(DemoExtractor())
|
||||||
|
# registry.register(MySiteExtractor()) # Add new extractors here
|
||||||
|
|
||||||
|
return registry
|
||||||
|
|
||||||
|
|
||||||
|
def create_extraction_service() -> ExtractionService:
|
||||||
|
"""Create an ExtractionService with all extractors registered.
|
||||||
|
|
||||||
|
This is the main entry point for the extraction framework.
|
||||||
|
Call this once during app startup.
|
||||||
|
"""
|
||||||
|
registry = create_registry()
|
||||||
|
return ExtractionService(registry)
|
||||||
118
stacks/f1-stream/files/backend/extractors/base.py
Normal file
118
stacks/f1-stream/files/backend/extractors/base.py
Normal file
|
|
@ -0,0 +1,118 @@
|
||||||
|
"""Base class for all site-specific stream extractors."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from backend.extractors.models import ExtractedStream
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class BaseExtractor(ABC):
|
||||||
|
"""Abstract base class for site-specific stream extractors.
|
||||||
|
|
||||||
|
To create a new extractor:
|
||||||
|
1. Create a new file in backend/extractors/
|
||||||
|
2. Subclass BaseExtractor
|
||||||
|
3. Implement site_key, site_name, and extract()
|
||||||
|
4. Register it in backend/extractors/__init__.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def site_key(self) -> str:
|
||||||
|
"""Unique identifier for this site (e.g., 'sportsurge').
|
||||||
|
|
||||||
|
Must be lowercase, alphanumeric with hyphens/underscores only.
|
||||||
|
Used as the cache key and in API responses.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def site_name(self) -> str:
|
||||||
|
"""Human-readable name (e.g., 'SportSurge').
|
||||||
|
|
||||||
|
Displayed in the UI and API responses.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def extract(self) -> list[ExtractedStream]:
|
||||||
|
"""Extract stream URLs from this site.
|
||||||
|
|
||||||
|
Returns a list of ExtractedStream objects. Each represents a
|
||||||
|
discovered stream URL. The extractor should set url, quality,
|
||||||
|
and title fields; site_key, site_name, and extracted_at are
|
||||||
|
auto-populated if left empty.
|
||||||
|
|
||||||
|
Implementations should:
|
||||||
|
- Use httpx for HTTP requests
|
||||||
|
- Handle their own errors gracefully (log and return empty list)
|
||||||
|
- Set quality when detectable from the source
|
||||||
|
- Set title to something descriptive
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def health_check(self, url: str) -> bool:
|
||||||
|
"""Verify a URL is live (HEAD request, check for m3u8 content).
|
||||||
|
|
||||||
|
Sends a HEAD request and checks:
|
||||||
|
1. HTTP 200 response
|
||||||
|
2. Content-Type suggests HLS/media content (if available)
|
||||||
|
|
||||||
|
Returns True if the URL appears to be a live stream.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=10.0,
|
||||||
|
follow_redirects=True,
|
||||||
|
headers={"User-Agent": "Mozilla/5.0"},
|
||||||
|
) as client:
|
||||||
|
response = await client.head(url)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.debug(
|
||||||
|
"[%s] Health check failed for %s: HTTP %d",
|
||||||
|
self.site_key,
|
||||||
|
url,
|
||||||
|
response.status_code,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
content_type = response.headers.get("content-type", "").lower()
|
||||||
|
# m3u8 streams typically have these content types
|
||||||
|
live_indicators = [
|
||||||
|
"application/vnd.apple.mpegurl",
|
||||||
|
"application/x-mpegurl",
|
||||||
|
"video/",
|
||||||
|
"audio/",
|
||||||
|
"octet-stream",
|
||||||
|
]
|
||||||
|
|
||||||
|
# If content-type is present and doesn't look like media,
|
||||||
|
# the URL might not be a stream. But some servers don't set
|
||||||
|
# content-type properly for HEAD, so we still return True
|
||||||
|
# if content-type is missing or generic.
|
||||||
|
if content_type and not any(ind in content_type for ind in live_indicators):
|
||||||
|
# Content type present but doesn't look like media.
|
||||||
|
# Could still be valid (some servers return text/plain for m3u8).
|
||||||
|
if "text/" in content_type or "html" in content_type:
|
||||||
|
logger.debug(
|
||||||
|
"[%s] Health check suspect for %s: content-type=%s",
|
||||||
|
self.site_key,
|
||||||
|
url,
|
||||||
|
content_type,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.debug("[%s] Health check timed out for %s", self.site_key, url)
|
||||||
|
return False
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
logger.debug("[%s] Health check error for %s: %s", self.site_key, url, e)
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
logger.exception("[%s] Unexpected error during health check for %s", self.site_key, url)
|
||||||
|
return False
|
||||||
75
stacks/f1-stream/files/backend/extractors/demo.py
Normal file
75
stacks/f1-stream/files/backend/extractors/demo.py
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
"""Demo extractor - returns hardcoded test streams for framework testing.
|
||||||
|
|
||||||
|
This extractor exists purely for testing the extraction pipeline end-to-end.
|
||||||
|
It does NOT connect to any real streaming site. Disable it in production by
|
||||||
|
removing its registration from __init__.py or setting DEMO_EXTRACTOR_ENABLED=false.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
from backend.extractors.base import BaseExtractor
|
||||||
|
from backend.extractors.models import ExtractedStream
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Set DEMO_EXTRACTOR_ENABLED=false to disable this extractor
|
||||||
|
DEMO_ENABLED = os.getenv("DEMO_EXTRACTOR_ENABLED", "true").lower() in ("true", "1", "yes")
|
||||||
|
|
||||||
|
|
||||||
|
class DemoExtractor(BaseExtractor):
|
||||||
|
"""Demo extractor that returns hardcoded test streams.
|
||||||
|
|
||||||
|
Use this to verify the extraction framework works end-to-end without
|
||||||
|
needing a real streaming site. The streams are publicly available HLS
|
||||||
|
test streams from Apple and others.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_key(self) -> str:
|
||||||
|
return "demo"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_name(self) -> str:
|
||||||
|
return "Demo (Test Streams)"
|
||||||
|
|
||||||
|
async def extract(self) -> list[ExtractedStream]:
|
||||||
|
"""Return hardcoded test streams for framework testing."""
|
||||||
|
if not DEMO_ENABLED:
|
||||||
|
logger.info("[demo] Demo extractor is disabled via DEMO_EXTRACTOR_ENABLED")
|
||||||
|
return []
|
||||||
|
|
||||||
|
logger.info("[demo] Returning demo test streams")
|
||||||
|
|
||||||
|
streams = [
|
||||||
|
ExtractedStream(
|
||||||
|
url="https://test-streams.mux.dev/x36xhzz/x36xhzz.m3u8",
|
||||||
|
site_key=self.site_key,
|
||||||
|
site_name=self.site_name,
|
||||||
|
quality="720p",
|
||||||
|
title="Big Buck Bunny (Test Stream)",
|
||||||
|
is_live=False,
|
||||||
|
),
|
||||||
|
ExtractedStream(
|
||||||
|
url="https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
|
||||||
|
site_key=self.site_key,
|
||||||
|
site_name=self.site_name,
|
||||||
|
quality="1080p",
|
||||||
|
title="Apple Bipbop (Test Stream)",
|
||||||
|
is_live=False,
|
||||||
|
),
|
||||||
|
ExtractedStream(
|
||||||
|
url="https://cph-p2p-msl.akamaized.net/hls/live/2000341/test/master.m3u8",
|
||||||
|
site_key=self.site_key,
|
||||||
|
site_name=self.site_name,
|
||||||
|
quality="",
|
||||||
|
title="Akamai Live Test Stream",
|
||||||
|
is_live=False,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Optionally run health checks on the demo streams
|
||||||
|
for stream in streams:
|
||||||
|
stream.is_live = await self.health_check(stream.url)
|
||||||
|
|
||||||
|
return streams
|
||||||
29
stacks/f1-stream/files/backend/extractors/models.py
Normal file
29
stacks/f1-stream/files/backend/extractors/models.py
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
"""Data models for the stream extraction framework."""
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExtractedStream:
|
||||||
|
"""Represents a single stream URL discovered by an extractor."""
|
||||||
|
|
||||||
|
url: str # The HLS/m3u8 URL
|
||||||
|
site_key: str # Which extractor found it
|
||||||
|
site_name: str # Human-readable name
|
||||||
|
quality: str = "" # e.g., "720p", "1080p", or empty
|
||||||
|
title: str = "" # e.g., "F1 Race Live"
|
||||||
|
extracted_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
||||||
|
is_live: bool = False # Whether it passed health check
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
"""Serialize to a plain dictionary for JSON responses."""
|
||||||
|
return {
|
||||||
|
"url": self.url,
|
||||||
|
"site_key": self.site_key,
|
||||||
|
"site_name": self.site_name,
|
||||||
|
"quality": self.quality,
|
||||||
|
"title": self.title,
|
||||||
|
"extracted_at": self.extracted_at,
|
||||||
|
"is_live": self.is_live,
|
||||||
|
}
|
||||||
116
stacks/f1-stream/files/backend/extractors/registry.py
Normal file
116
stacks/f1-stream/files/backend/extractors/registry.py
Normal file
|
|
@ -0,0 +1,116 @@
|
||||||
|
"""Central registry for stream extractors."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from backend.extractors.base import BaseExtractor
|
||||||
|
from backend.extractors.models import ExtractedStream
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractorRegistry:
|
||||||
|
"""Central registry for all site extractors.
|
||||||
|
|
||||||
|
Manages extractor instances and provides fan-out extraction across
|
||||||
|
all registered extractors with independent error handling.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._extractors: dict[str, BaseExtractor] = {}
|
||||||
|
|
||||||
|
def register(self, extractor: BaseExtractor) -> None:
|
||||||
|
"""Register an extractor instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
extractor: A BaseExtractor subclass instance.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If an extractor with the same site_key is already registered.
|
||||||
|
"""
|
||||||
|
key = extractor.site_key
|
||||||
|
if key in self._extractors:
|
||||||
|
raise ValueError(
|
||||||
|
f"Extractor with site_key '{key}' is already registered "
|
||||||
|
f"(existing: {self._extractors[key].site_name}, "
|
||||||
|
f"new: {extractor.site_name})"
|
||||||
|
)
|
||||||
|
self._extractors[key] = extractor
|
||||||
|
logger.info("Registered extractor: %s (%s)", extractor.site_name, key)
|
||||||
|
|
||||||
|
def get(self, site_key: str) -> BaseExtractor | None:
|
||||||
|
"""Get an extractor by its site_key.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
site_key: The unique identifier of the extractor.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The extractor instance, or None if not found.
|
||||||
|
"""
|
||||||
|
return self._extractors.get(site_key)
|
||||||
|
|
||||||
|
def list_extractors(self) -> list[dict]:
|
||||||
|
"""List all registered extractors.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of dicts with site_key and site_name for each extractor.
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
{"site_key": ext.site_key, "site_name": ext.site_name}
|
||||||
|
for ext in self._extractors.values()
|
||||||
|
]
|
||||||
|
|
||||||
|
async def extract_all(self) -> list[ExtractedStream]:
|
||||||
|
"""Fan-out extraction to all registered extractors concurrently.
|
||||||
|
|
||||||
|
Each extractor runs independently. If one fails, the others
|
||||||
|
continue and their results are still collected.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Combined list of ExtractedStream from all extractors.
|
||||||
|
"""
|
||||||
|
if not self._extractors:
|
||||||
|
logger.warning("No extractors registered, nothing to extract")
|
||||||
|
return []
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Running extraction across %d extractor(s): %s",
|
||||||
|
len(self._extractors),
|
||||||
|
", ".join(self._extractors.keys()),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _safe_extract(extractor: BaseExtractor) -> list[ExtractedStream]:
|
||||||
|
"""Run a single extractor with error isolation."""
|
||||||
|
try:
|
||||||
|
streams = await extractor.extract()
|
||||||
|
# Fill in site_key/site_name if the extractor didn't set them
|
||||||
|
now = datetime.now(timezone.utc).isoformat()
|
||||||
|
for stream in streams:
|
||||||
|
if not stream.site_key:
|
||||||
|
stream.site_key = extractor.site_key
|
||||||
|
if not stream.site_name:
|
||||||
|
stream.site_name = extractor.site_name
|
||||||
|
if not stream.extracted_at:
|
||||||
|
stream.extracted_at = now
|
||||||
|
logger.info(
|
||||||
|
"[%s] Extracted %d stream(s)", extractor.site_key, len(streams)
|
||||||
|
)
|
||||||
|
return streams
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
"[%s] Extractor failed during extraction", extractor.site_key
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Run all extractors concurrently
|
||||||
|
tasks = [_safe_extract(ext) for ext in self._extractors.values()]
|
||||||
|
results = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
# Flatten results
|
||||||
|
all_streams: list[ExtractedStream] = []
|
||||||
|
for stream_list in results:
|
||||||
|
all_streams.extend(stream_list)
|
||||||
|
|
||||||
|
logger.info("Extraction complete: %d total stream(s) found", len(all_streams))
|
||||||
|
return all_streams
|
||||||
121
stacks/f1-stream/files/backend/extractors/service.py
Normal file
121
stacks/f1-stream/files/backend/extractors/service.py
Normal file
|
|
@ -0,0 +1,121 @@
|
||||||
|
"""Extraction service - manages extraction lifecycle: polling, caching, serving."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from backend.extractors.models import ExtractedStream
|
||||||
|
from backend.extractors.registry import ExtractorRegistry
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractionService:
|
||||||
|
"""Manages the extraction lifecycle: polling, caching, and serving results.
|
||||||
|
|
||||||
|
Extraction runs on a background schedule (via APScheduler), never on
|
||||||
|
client request path. Results are cached in memory, keyed by site_key.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, registry: ExtractorRegistry) -> None:
|
||||||
|
self._registry = registry
|
||||||
|
# Cache: site_key -> list of ExtractedStream
|
||||||
|
self._cache: dict[str, list[ExtractedStream]] = {}
|
||||||
|
self._last_run: str | None = None
|
||||||
|
self._last_run_stream_count: int = 0
|
||||||
|
|
||||||
|
async def run_extraction(self) -> None:
|
||||||
|
"""Run all extractors and cache their results.
|
||||||
|
|
||||||
|
This is called by the background scheduler. Each extractor's
|
||||||
|
results replace its previous cache entry entirely.
|
||||||
|
"""
|
||||||
|
logger.info("Starting extraction run...")
|
||||||
|
start = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
streams = await self._registry.extract_all()
|
||||||
|
|
||||||
|
# Group streams by site_key and update cache
|
||||||
|
new_cache: dict[str, list[ExtractedStream]] = {}
|
||||||
|
for stream in streams:
|
||||||
|
new_cache.setdefault(stream.site_key, []).append(stream)
|
||||||
|
|
||||||
|
# Replace cache for extractors that returned results.
|
||||||
|
# Clear cache for extractors that returned nothing (site went down, etc.)
|
||||||
|
for extractor_info in self._registry.list_extractors():
|
||||||
|
key = extractor_info["site_key"]
|
||||||
|
if key in new_cache:
|
||||||
|
self._cache[key] = new_cache[key]
|
||||||
|
else:
|
||||||
|
# Extractor returned nothing - clear its cache
|
||||||
|
self._cache.pop(key, None)
|
||||||
|
|
||||||
|
self._last_run = start.isoformat()
|
||||||
|
self._last_run_stream_count = len(streams)
|
||||||
|
|
||||||
|
elapsed = (datetime.now(timezone.utc) - start).total_seconds()
|
||||||
|
logger.info(
|
||||||
|
"Extraction run complete: %d stream(s) from %d extractor(s) in %.1fs",
|
||||||
|
len(streams),
|
||||||
|
len(new_cache),
|
||||||
|
elapsed,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_streams(self) -> list[dict]:
|
||||||
|
"""Return all cached streams as a flat list of dicts.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of serialized ExtractedStream dicts from all extractors.
|
||||||
|
"""
|
||||||
|
all_streams: list[dict] = []
|
||||||
|
for streams in self._cache.values():
|
||||||
|
all_streams.extend(s.to_dict() for s in streams)
|
||||||
|
return all_streams
|
||||||
|
|
||||||
|
def get_streams_for_session(self, session_type: str) -> list[dict]:
|
||||||
|
"""Return cached streams filtered/annotated for a specific session type.
|
||||||
|
|
||||||
|
Currently returns all streams (extractors don't yet differentiate by
|
||||||
|
session type). This method exists as a hook for future filtering,
|
||||||
|
e.g., some extractors might only have race streams but not FP streams.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session_type: The F1 session type (e.g., "race", "qualifying", "fp1").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of serialized ExtractedStream dicts.
|
||||||
|
"""
|
||||||
|
# For now, all streams are potentially relevant to any session.
|
||||||
|
# Future extractors may tag streams with session types, at which
|
||||||
|
# point this method will filter accordingly.
|
||||||
|
streams = self.get_streams()
|
||||||
|
logger.debug(
|
||||||
|
"Returning %d stream(s) for session type '%s'",
|
||||||
|
len(streams),
|
||||||
|
session_type,
|
||||||
|
)
|
||||||
|
return streams
|
||||||
|
|
||||||
|
def get_status(self) -> dict:
|
||||||
|
"""Return extraction service status for the /extractors endpoint."""
|
||||||
|
extractor_list = self._registry.list_extractors()
|
||||||
|
extractor_statuses = []
|
||||||
|
|
||||||
|
for info in extractor_list:
|
||||||
|
key = info["site_key"]
|
||||||
|
cached = self._cache.get(key, [])
|
||||||
|
extractor_statuses.append(
|
||||||
|
{
|
||||||
|
"site_key": key,
|
||||||
|
"site_name": info["site_name"],
|
||||||
|
"cached_streams": len(cached),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"extractors": extractor_statuses,
|
||||||
|
"total_cached_streams": sum(
|
||||||
|
len(streams) for streams in self._cache.values()
|
||||||
|
),
|
||||||
|
"last_run": self._last_run,
|
||||||
|
"last_run_stream_count": self._last_run_stream_count,
|
||||||
|
}
|
||||||
|
|
@ -1,12 +1,14 @@
|
||||||
"""F1 Streams - FastAPI backend with schedule service."""
|
"""F1 Streams - FastAPI backend with schedule and stream extraction services."""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||||
from apscheduler.triggers.cron import CronTrigger
|
from apscheduler.triggers.cron import CronTrigger
|
||||||
|
from apscheduler.triggers.interval import IntervalTrigger
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
|
|
||||||
|
from backend.extractors import create_extraction_service
|
||||||
from backend.schedule import ScheduleService
|
from backend.schedule import ScheduleService
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
|
|
@ -16,21 +18,67 @@ logging.basicConfig(
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
schedule_service = ScheduleService()
|
schedule_service = ScheduleService()
|
||||||
|
extraction_service = create_extraction_service()
|
||||||
scheduler = AsyncIOScheduler()
|
scheduler = AsyncIOScheduler()
|
||||||
|
|
||||||
|
|
||||||
async def _scheduled_refresh() -> None:
|
async def _scheduled_refresh() -> None:
|
||||||
"""Callback for APScheduler daily refresh."""
|
"""Callback for APScheduler daily schedule refresh."""
|
||||||
logger.info("Running scheduled schedule refresh...")
|
logger.info("Running scheduled schedule refresh...")
|
||||||
await schedule_service.refresh()
|
await schedule_service.refresh()
|
||||||
|
|
||||||
|
|
||||||
|
async def _scheduled_extraction() -> None:
|
||||||
|
"""Callback for APScheduler stream extraction.
|
||||||
|
|
||||||
|
Adjusts its own interval based on whether a session is currently live:
|
||||||
|
- During a live session: reschedule to every 5 minutes
|
||||||
|
- Otherwise: reschedule to every 30 minutes
|
||||||
|
"""
|
||||||
|
logger.info("Running scheduled extraction...")
|
||||||
|
await extraction_service.run_extraction()
|
||||||
|
|
||||||
|
# Check if any session is currently live and adjust polling interval
|
||||||
|
schedule_data = schedule_service.get_schedule()
|
||||||
|
is_live = False
|
||||||
|
for race in schedule_data.get("races", []):
|
||||||
|
for session in race.get("sessions", []):
|
||||||
|
if session.get("status") == "live":
|
||||||
|
is_live = True
|
||||||
|
break
|
||||||
|
if is_live:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Update the extraction job interval based on live status
|
||||||
|
job = scheduler.get_job("stream_extraction")
|
||||||
|
if job:
|
||||||
|
current_interval = getattr(job.trigger, "interval_length", None)
|
||||||
|
desired_interval = 300 if is_live else 1800 # 5 min or 30 min
|
||||||
|
|
||||||
|
if current_interval != desired_interval:
|
||||||
|
interval_minutes = 5 if is_live else 30
|
||||||
|
scheduler.reschedule_job(
|
||||||
|
"stream_extraction",
|
||||||
|
trigger=IntervalTrigger(minutes=interval_minutes),
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Extraction interval adjusted to %d minutes (live=%s)",
|
||||||
|
interval_minutes,
|
||||||
|
is_live,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
"""Startup and shutdown lifecycle handler."""
|
"""Startup and shutdown lifecycle handler."""
|
||||||
# Startup: load schedule and start background scheduler
|
# Startup: load schedule and start background scheduler
|
||||||
await schedule_service.initialize()
|
await schedule_service.initialize()
|
||||||
|
|
||||||
|
# Run initial extraction
|
||||||
|
logger.info("Running initial stream extraction...")
|
||||||
|
await extraction_service.run_extraction()
|
||||||
|
|
||||||
|
# Schedule daily schedule refresh
|
||||||
scheduler.add_job(
|
scheduler.add_job(
|
||||||
_scheduled_refresh,
|
_scheduled_refresh,
|
||||||
trigger=CronTrigger(hour=3, minute=0, timezone="UTC"),
|
trigger=CronTrigger(hour=3, minute=0, timezone="UTC"),
|
||||||
|
|
@ -38,8 +86,18 @@ async def lifespan(app: FastAPI):
|
||||||
name="Refresh F1 schedule daily at 03:00 UTC",
|
name="Refresh F1 schedule daily at 03:00 UTC",
|
||||||
replace_existing=True,
|
replace_existing=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Schedule periodic stream extraction (default: every 30 minutes)
|
||||||
|
scheduler.add_job(
|
||||||
|
_scheduled_extraction,
|
||||||
|
trigger=IntervalTrigger(minutes=30),
|
||||||
|
id="stream_extraction",
|
||||||
|
name="Extract streams from all registered sites",
|
||||||
|
replace_existing=True,
|
||||||
|
)
|
||||||
|
|
||||||
scheduler.start()
|
scheduler.start()
|
||||||
logger.info("APScheduler started - daily refresh at 03:00 UTC")
|
logger.info("APScheduler started - schedule refresh at 03:00 UTC, extraction every 30m")
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
||||||
|
|
@ -51,6 +109,9 @@ async def lifespan(app: FastAPI):
|
||||||
app = FastAPI(title="F1 Streams", lifespan=lifespan)
|
app = FastAPI(title="F1 Streams", lifespan=lifespan)
|
||||||
|
|
||||||
|
|
||||||
|
# --- Health & Info ---
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health():
|
async def health():
|
||||||
return {"status": "ok"}
|
return {"status": "ok"}
|
||||||
|
|
@ -58,7 +119,10 @@ async def health():
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
async def root():
|
async def root():
|
||||||
return {"service": "f1-streams", "version": "2.0.1"}
|
return {"service": "f1-streams", "version": "3.0.0"}
|
||||||
|
|
||||||
|
|
||||||
|
# --- Schedule ---
|
||||||
|
|
||||||
|
|
||||||
@app.get("/schedule")
|
@app.get("/schedule")
|
||||||
|
|
@ -74,6 +138,37 @@ async def refresh_schedule():
|
||||||
return {"status": "refreshed"}
|
return {"status": "refreshed"}
|
||||||
|
|
||||||
|
|
||||||
|
# --- Streams & Extraction ---
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/streams")
|
||||||
|
async def get_streams():
|
||||||
|
"""Return all currently cached streams from all extractors."""
|
||||||
|
streams = extraction_service.get_streams()
|
||||||
|
return {
|
||||||
|
"streams": streams,
|
||||||
|
"count": len(streams),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/extractors")
|
||||||
|
async def get_extractors():
|
||||||
|
"""List registered extractors and their current status."""
|
||||||
|
return extraction_service.get_status()
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/extract")
|
||||||
|
async def trigger_extraction():
|
||||||
|
"""Manually trigger an extraction run across all registered extractors."""
|
||||||
|
await extraction_service.run_extraction()
|
||||||
|
status = extraction_service.get_status()
|
||||||
|
return {
|
||||||
|
"status": "extraction_complete",
|
||||||
|
"streams_found": status["total_cached_streams"],
|
||||||
|
"extractors_run": len(status["extractors"]),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ resource "kubernetes_deployment" "f1-stream" {
|
||||||
}
|
}
|
||||||
spec {
|
spec {
|
||||||
container {
|
container {
|
||||||
image = "viktorbarzin/f1-stream:v2.0.3"
|
image = "viktorbarzin/f1-stream:v3.0.0"
|
||||||
name = "f1-stream"
|
name = "f1-stream"
|
||||||
resources {
|
resources {
|
||||||
limits = {
|
limits = {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue