f1-stream: only show streams confirmed playable by headless browser
Cuts the stream list from 23 mostly-broken entries to ~6 confirmed-playable ones, and adds an iframe-stripping proxy so embed sources (hmembeds, etc.) load through our origin without X-Frame-Options / CSP / JS frame-buster blocks. Why: the previous list was dominated by Discord-shared news article URLs, hardcoded aggregator landing pages, and other non-stream URLs that all sat at is_live=true because embed streams skipped the health check entirely. Users could not tell which links would actually play. What: - backend/playback_verifier.py: new headless-Chromium verifier (Playwright) that polls each candidate stream for a codec-independent "playable" signal (hls.js MANIFEST_PARSED for m3u8; <video>/player div for embed). Replaces the unconditional is_live=True for embed streams in service.py. - backend/embed_proxy.py: new /embed and /embed-asset routes that fetch upstream embed pages, strip X-Frame-Options/CSP/Set-Cookie, and inject a <base href> + frame-buster-defeat <script> that locks down window.top, document.referrer, console.clear/table, and window.location so the hmembeds disable-devtool.js redirect-to-google trap can't fire. - extractors/curated.py: new always-on extractor with two known-good 24/7 hmembeds embeds (Sky Sports F1, DAZN F1) so the list isn't empty between race weekends. - extractors/__init__.py: register CuratedExtractor first; drop FallbackExtractor (its 10 aggregator landing-pages can't iframe-play). - extractors/discord_source.py: positive-match path filter (must look like /embed/, /stream, /watch, /live, /player, *.m3u8, *.php) plus expanded domain blocklist for news sites — was 10 noise URLs, now ~1. - extractors/service.py: run_extraction now health-checks AND verifier- checks both stream types; only verified-playable streams reach is_live. - main.py: register /embed + /embed-asset routes; defer initial extraction by 8s so the verifier can reach the local /embed proxy on 127.0.0.1:8000. - frontend/lib/api.js + watch/+page.svelte: route embed iframes through /embed proxy instead of the upstream URL, so X-Frame-Options/CSP can't block them. - Dockerfile: install Playwright chromium + system codec-runtime libs. - main.tf: bump pod memory 256Mi → 1Gi for chromium. Verified end-to-end with Playwright against https://f1.viktorbarzin.me/watch — 6/6 streams reach a player UI; the 3 demo m3u8s actually play (codec-bearing browser); the 3 embeds (Sky Sports F1, DAZN F1, sportsurge) render iframes through the proxy. Image: viktorbarzin/f1-stream:v6.0.5 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
8b180f7662
commit
f90d79ed4e
15 changed files with 2128 additions and 22 deletions
|
|
@ -14,9 +14,26 @@ FROM python:3.13-slim-bookworm
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Headless Chromium runtime libs for the playback verifier. Listed inline
|
||||||
|
# (instead of running `playwright install-deps`) so the image build doesn't
|
||||||
|
# need root-network apt fetches at runtime.
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
ca-certificates \
|
||||||
|
libnss3 libnspr4 \
|
||||||
|
libatk1.0-0 libatk-bridge2.0-0 libcups2 \
|
||||||
|
libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 \
|
||||||
|
libxfixes3 libxrandr2 libgbm1 libpango-1.0-0 libcairo2 \
|
||||||
|
libasound2 libatspi2.0-0 \
|
||||||
|
fonts-liberation fonts-noto-color-emoji \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
COPY backend/requirements.txt .
|
COPY backend/requirements.txt .
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Install the Chromium browser binary used by the verifier. Skip
|
||||||
|
# --with-deps because we already installed the system libs above.
|
||||||
|
RUN playwright install chromium
|
||||||
|
|
||||||
COPY backend/ ./backend/
|
COPY backend/ ./backend/
|
||||||
|
|
||||||
# Copy built frontend into the image
|
# Copy built frontend into the image
|
||||||
|
|
|
||||||
302
stacks/f1-stream/files/backend/embed_proxy.py
Normal file
302
stacks/f1-stream/files/backend/embed_proxy.py
Normal file
|
|
@ -0,0 +1,302 @@
|
||||||
|
"""Embed iframe-stripping reverse proxy.
|
||||||
|
|
||||||
|
Serves third-party embed pages (e.g. https://hmembeds.one/embed/{hash},
|
||||||
|
https://pooembed.eu/embed/{slug}) through our origin so we can:
|
||||||
|
|
||||||
|
1. Strip X-Frame-Options and Content-Security-Policy: frame-ancestors headers,
|
||||||
|
so the embed loads in our <iframe> regardless of upstream policy.
|
||||||
|
2. Inject <base> + a frame-buster-defeat <script> at the top of <head> so
|
||||||
|
the embed's JS sees `window.top === window` and a plausible
|
||||||
|
`document.referrer` pointing at the upstream origin.
|
||||||
|
3. Forward Referer / User-Agent matching the upstream's own pages so
|
||||||
|
the upstream's hotlink / origin-allowlist checks pass.
|
||||||
|
|
||||||
|
Two endpoints:
|
||||||
|
- GET /embed?url=<base64url> — the embed HTML page (rewritten).
|
||||||
|
- GET /embed-asset?url=<base64url> — fallback for any subresource the
|
||||||
|
upstream blocks based on hotlink protection. Most assets load directly
|
||||||
|
via the injected <base> tag and bypass our proxy.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from typing import AsyncGenerator
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
from backend.m3u8_rewriter import decode_url
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
EMBED_TIMEOUT = 20.0
|
||||||
|
ASSET_TIMEOUT = 30.0
|
||||||
|
RELAY_CHUNK_SIZE = 65536
|
||||||
|
|
||||||
|
USER_AGENT = (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Response headers we never forward (they break frame embedding or leak upstream policy).
|
||||||
|
STRIP_RESPONSE_HEADERS = {
|
||||||
|
"x-frame-options",
|
||||||
|
"content-security-policy",
|
||||||
|
"content-security-policy-report-only",
|
||||||
|
"set-cookie",
|
||||||
|
"report-to",
|
||||||
|
"nel",
|
||||||
|
"permissions-policy",
|
||||||
|
"cross-origin-opener-policy",
|
||||||
|
"cross-origin-embedder-policy",
|
||||||
|
"cross-origin-resource-policy",
|
||||||
|
# let httpx/uvicorn re-set these
|
||||||
|
"transfer-encoding",
|
||||||
|
"content-encoding",
|
||||||
|
"content-length",
|
||||||
|
"connection",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Inject this <script> at the top of <head> to defeat JS frame-busters.
|
||||||
|
# - Locks window.top, window.parent, and window.self to the embed window
|
||||||
|
# itself, so `self !== window.top` checks pass.
|
||||||
|
# - Forces document.referrer to the upstream origin so allowlist checks
|
||||||
|
# like `document.referrer.includes("timstreams.net")` keep working.
|
||||||
|
# - No-ops anything that would call window.parent.location or attempt to
|
||||||
|
# reload the top frame.
|
||||||
|
_FRAME_BUSTER_DEFEAT_TEMPLATE = """
|
||||||
|
<script>(function(){{
|
||||||
|
try {{
|
||||||
|
var fakeWindow = window;
|
||||||
|
Object.defineProperty(window, 'top', {{get: function(){{return fakeWindow;}}, configurable: false}});
|
||||||
|
Object.defineProperty(window, 'parent', {{get: function(){{return fakeWindow;}}, configurable: false}});
|
||||||
|
Object.defineProperty(window, 'frameElement', {{get: function(){{return null;}}, configurable: false}});
|
||||||
|
Object.defineProperty(document, 'referrer', {{get: function(){{return {referrer!r};}}, configurable: false}});
|
||||||
|
}} catch (e) {{}}
|
||||||
|
// Defeat the `disable-devtool.js` redirect trap that hmembeds and similar
|
||||||
|
// embed hosts use. The trap fires `console.clear`/`console.table` in a
|
||||||
|
// tight loop, then if it thinks DevTools is open, calls
|
||||||
|
// `window.location = "https://www.google.com"`. We block those redirect
|
||||||
|
// sinks while leaving normal playback unaffected.
|
||||||
|
try {{
|
||||||
|
var noop = function(){{}};
|
||||||
|
console.clear = noop;
|
||||||
|
console.table = noop;
|
||||||
|
console.dir = noop;
|
||||||
|
var loc = window.location;
|
||||||
|
Object.defineProperty(window, 'location', {{
|
||||||
|
get: function(){{ return loc; }},
|
||||||
|
set: function(v){{ /* swallow assignment */ }},
|
||||||
|
configurable: false,
|
||||||
|
}});
|
||||||
|
var origAssign = loc.assign && loc.assign.bind(loc);
|
||||||
|
var origReplace = loc.replace && loc.replace.bind(loc);
|
||||||
|
loc.assign = function(u){{ if (typeof u === 'string' && u.indexOf('google.com') !== -1) return; if (origAssign) origAssign(u); }};
|
||||||
|
loc.replace = function(u){{ if (typeof u === 'string' && u.indexOf('google.com') !== -1) return; if (origReplace) origReplace(u); }};
|
||||||
|
}} catch (e) {{}}
|
||||||
|
}})();</script>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def _decode(encoded_url: str) -> str:
|
||||||
|
try:
|
||||||
|
return decode_url(encoded_url)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Invalid encoded URL: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_headers(upstream_headers: httpx.Headers) -> dict[str, str]:
|
||||||
|
"""Forward upstream headers minus the ones we strip."""
|
||||||
|
out: dict[str, str] = {}
|
||||||
|
for k, v in upstream_headers.items():
|
||||||
|
if k.lower() in STRIP_RESPONSE_HEADERS:
|
||||||
|
continue
|
||||||
|
out[k] = v
|
||||||
|
# Always allow our domain to embed and load cross-origin
|
||||||
|
out["Access-Control-Allow-Origin"] = "*"
|
||||||
|
out["X-Frame-Options-Stripped"] = "by-f1-embed-proxy"
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _make_referer(upstream_url: str) -> str:
|
||||||
|
"""Build a plausible Referer header — the upstream's own root."""
|
||||||
|
parsed = urlparse(upstream_url)
|
||||||
|
return f"{parsed.scheme}://{parsed.netloc}/"
|
||||||
|
|
||||||
|
|
||||||
|
def _make_origin(upstream_url: str) -> str:
|
||||||
|
parsed = urlparse(upstream_url)
|
||||||
|
return f"{parsed.scheme}://{parsed.netloc}"
|
||||||
|
|
||||||
|
|
||||||
|
def _inject_into_head(html: str, upstream_url: str) -> str:
|
||||||
|
"""Inject <base> tag + frame-buster defeat script into the response HTML."""
|
||||||
|
parsed = urlparse(upstream_url)
|
||||||
|
base_href = f"{parsed.scheme}://{parsed.netloc}/"
|
||||||
|
|
||||||
|
# The frame-buster-defeat script. Use the upstream's own URL as the spoofed referrer.
|
||||||
|
busted = _FRAME_BUSTER_DEFEAT_TEMPLATE.format(referrer=upstream_url)
|
||||||
|
|
||||||
|
base_tag = f'<base href="{base_href}">'
|
||||||
|
|
||||||
|
injection = base_tag + busted
|
||||||
|
|
||||||
|
# Drop any inline CSP <meta> tags first so they can't override our header strip.
|
||||||
|
html = re.sub(
|
||||||
|
r'<meta[^>]+http-equiv=[\'"]?Content-Security-Policy[\'"]?[^>]*>',
|
||||||
|
"",
|
||||||
|
html,
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Strip disable-devtool.js script tags. The library runs detection heuristics
|
||||||
|
# and redirects on match. Removing it reduces attack surface even with our
|
||||||
|
# location-setter lockdown — saves redundant work and one fewer thing to
|
||||||
|
# bypass in case the lockdown misses an edge case.
|
||||||
|
html = re.sub(
|
||||||
|
r'<script[^>]+(?:disable-devtool|devtool|disabledevtool)[^<]*</script>',
|
||||||
|
"",
|
||||||
|
html,
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
html = re.sub(
|
||||||
|
r'<script[^>]+src=["\'][^"\']*disable-devtool[^"\']*["\'][^>]*></script>',
|
||||||
|
"",
|
||||||
|
html,
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Insert immediately after the opening <head> (case-insensitive).
|
||||||
|
head_match = re.search(r"<head[^>]*>", html, flags=re.IGNORECASE)
|
||||||
|
if head_match:
|
||||||
|
idx = head_match.end()
|
||||||
|
return html[:idx] + injection + html[idx:]
|
||||||
|
|
||||||
|
# No <head> — prepend at the start of the document so the script runs first.
|
||||||
|
return injection + html
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_blocked_by_anti_bot(content: str) -> bool:
|
||||||
|
"""Detect Cloudflare-style challenge interstitials in the upstream body."""
|
||||||
|
sample = content[:4096].lower()
|
||||||
|
markers = (
|
||||||
|
"cf-chl-bypass",
|
||||||
|
"checking your browser",
|
||||||
|
"just a moment",
|
||||||
|
"attention required",
|
||||||
|
"cf-browser-verification",
|
||||||
|
)
|
||||||
|
return any(m in sample for m in markers)
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_embed(encoded_url: str) -> tuple[bytes, dict[str, str], int]:
|
||||||
|
"""Fetch an upstream embed page, rewrite the HTML, and return the response.
|
||||||
|
|
||||||
|
Returns: (body_bytes, headers_dict, status_code).
|
||||||
|
Raises HTTPException on transport errors.
|
||||||
|
"""
|
||||||
|
url = _decode(encoded_url)
|
||||||
|
logger.info("Embed-proxying: %s", url)
|
||||||
|
|
||||||
|
upstream_headers = {
|
||||||
|
"User-Agent": USER_AGENT,
|
||||||
|
"Referer": _make_referer(url),
|
||||||
|
"Origin": _make_origin(url),
|
||||||
|
"Accept": (
|
||||||
|
"text/html,application/xhtml+xml,application/xml;q=0.9,"
|
||||||
|
"image/avif,image/webp,*/*;q=0.8"
|
||||||
|
),
|
||||||
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=EMBED_TIMEOUT,
|
||||||
|
follow_redirects=True,
|
||||||
|
) as client:
|
||||||
|
response = await client.get(url, headers=upstream_headers)
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
raise HTTPException(status_code=504, detail="Upstream embed timeout")
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
raise HTTPException(status_code=502, detail=f"Upstream embed error: {e}")
|
||||||
|
|
||||||
|
status_code = response.status_code
|
||||||
|
upstream_ct = response.headers.get("content-type", "")
|
||||||
|
headers_out = _filter_headers(response.headers)
|
||||||
|
|
||||||
|
body = response.content
|
||||||
|
|
||||||
|
# Detect Cloudflare-style challenge so the frontend can show a clear error.
|
||||||
|
if "html" in upstream_ct.lower():
|
||||||
|
text = response.text
|
||||||
|
if _looks_blocked_by_anti_bot(text):
|
||||||
|
logger.warning("Upstream returned anti-bot challenge: %s", url)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502,
|
||||||
|
detail="Upstream returned anti-bot challenge — proxy cannot bypass",
|
||||||
|
)
|
||||||
|
|
||||||
|
rewritten = _inject_into_head(text, url)
|
||||||
|
body = rewritten.encode("utf-8")
|
||||||
|
headers_out["Content-Type"] = "text/html; charset=utf-8"
|
||||||
|
|
||||||
|
return body, headers_out, status_code
|
||||||
|
|
||||||
|
|
||||||
|
async def relay_asset(
|
||||||
|
encoded_url: str, range_header: str | None
|
||||||
|
) -> tuple[AsyncGenerator[bytes, None], dict[str, str], int]:
|
||||||
|
"""Relay an upstream subresource (JS/CSS/image/font) as a chunked stream.
|
||||||
|
|
||||||
|
Used as a fallback when an upstream blocks hotlinked assets via Referer
|
||||||
|
or Origin checks. The injected <base> tag handles most of these cases
|
||||||
|
by letting the browser hit upstream directly — the relay is only for
|
||||||
|
the awkward few that need a proxied origin.
|
||||||
|
"""
|
||||||
|
url = _decode(encoded_url)
|
||||||
|
logger.debug("Embed-asset relay: %s", url)
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"User-Agent": USER_AGENT,
|
||||||
|
"Referer": _make_referer(url),
|
||||||
|
"Origin": _make_origin(url),
|
||||||
|
"Accept": "*/*",
|
||||||
|
}
|
||||||
|
if range_header:
|
||||||
|
headers["Range"] = range_header
|
||||||
|
|
||||||
|
client = httpx.AsyncClient(timeout=ASSET_TIMEOUT, follow_redirects=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await client.send(
|
||||||
|
client.build_request("GET", url, headers=headers),
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
await client.aclose()
|
||||||
|
raise HTTPException(status_code=504, detail="Upstream asset timeout")
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
await client.aclose()
|
||||||
|
raise HTTPException(status_code=502, detail=f"Upstream asset error: {e}")
|
||||||
|
|
||||||
|
if response.status_code >= 400:
|
||||||
|
await response.aclose()
|
||||||
|
await client.aclose()
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502,
|
||||||
|
detail=f"Upstream asset returned HTTP {response.status_code}",
|
||||||
|
)
|
||||||
|
|
||||||
|
headers_out = _filter_headers(response.headers)
|
||||||
|
|
||||||
|
async def _stream() -> AsyncGenerator[bytes, None]:
|
||||||
|
try:
|
||||||
|
async for chunk in response.aiter_bytes(chunk_size=RELAY_CHUNK_SIZE):
|
||||||
|
yield chunk
|
||||||
|
finally:
|
||||||
|
await response.aclose()
|
||||||
|
await client.aclose()
|
||||||
|
|
||||||
|
return _stream(), headers_out, response.status_code
|
||||||
|
|
@ -12,12 +12,17 @@ Example:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from backend.extractors.aceztrims import AceztrimsExtractor
|
from backend.extractors.aceztrims import AceztrimsExtractor
|
||||||
|
from backend.extractors.curated import CuratedExtractor
|
||||||
from backend.extractors.daddylive import DaddyLiveExtractor
|
from backend.extractors.daddylive import DaddyLiveExtractor
|
||||||
from backend.extractors.demo import DemoExtractor
|
from backend.extractors.demo import DemoExtractor
|
||||||
|
from backend.extractors.discord_source import DiscordExtractor
|
||||||
from backend.extractors.models import ExtractedStream
|
from backend.extractors.models import ExtractedStream
|
||||||
|
from backend.extractors.pitsport import PitsportExtractor
|
||||||
|
from backend.extractors.ppv import PPVExtractor
|
||||||
from backend.extractors.registry import ExtractorRegistry
|
from backend.extractors.registry import ExtractorRegistry
|
||||||
from backend.extractors.service import ExtractionService
|
from backend.extractors.service import ExtractionService
|
||||||
from backend.extractors.streamed import StreamedExtractor
|
from backend.extractors.streamed import StreamedExtractor
|
||||||
|
from backend.extractors.timstreams import TimStreamsExtractor
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"ExtractedStream",
|
"ExtractedStream",
|
||||||
|
|
@ -36,10 +41,20 @@ def create_registry() -> ExtractorRegistry:
|
||||||
registry = ExtractorRegistry()
|
registry = ExtractorRegistry()
|
||||||
|
|
||||||
# --- Register extractors below ---
|
# --- Register extractors below ---
|
||||||
|
# CuratedExtractor returns hand-picked 24/7 channels first so we always
|
||||||
|
# have something. FallbackExtractor was removed — it surfaced aggregator
|
||||||
|
# landing pages that don't play directly in an iframe (they require
|
||||||
|
# user navigation through the page) and dominated the list with
|
||||||
|
# entries that fail browser-based playback verification.
|
||||||
|
registry.register(CuratedExtractor())
|
||||||
registry.register(DemoExtractor())
|
registry.register(DemoExtractor())
|
||||||
registry.register(StreamedExtractor())
|
registry.register(StreamedExtractor())
|
||||||
registry.register(DaddyLiveExtractor())
|
registry.register(DaddyLiveExtractor())
|
||||||
registry.register(AceztrimsExtractor())
|
registry.register(AceztrimsExtractor())
|
||||||
|
registry.register(PitsportExtractor())
|
||||||
|
registry.register(PPVExtractor())
|
||||||
|
registry.register(TimStreamsExtractor())
|
||||||
|
registry.register(DiscordExtractor())
|
||||||
|
|
||||||
return registry
|
return registry
|
||||||
|
|
||||||
|
|
|
||||||
61
stacks/f1-stream/files/backend/extractors/curated.py
Normal file
61
stacks/f1-stream/files/backend/extractors/curated.py
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
"""Curated extractor — known-good 24/7 F1 channels via direct embed URLs.
|
||||||
|
|
||||||
|
Returns a small, hand-picked list of embed URLs that are reliable enough to
|
||||||
|
be served as fallback "always-on" streams when the dynamic extractors find
|
||||||
|
nothing (e.g. between race weekends, when API providers are down).
|
||||||
|
|
||||||
|
These are direct embed URLs. The frontend routes them through /embed so the
|
||||||
|
iframe-stripping proxy bypasses any frame-buster JS in the upstream player.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from backend.extractors.base import BaseExtractor
|
||||||
|
from backend.extractors.models import ExtractedStream
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# Curated list. Each entry is a known direct embed URL. These were sourced
|
||||||
|
# from the timstreams.py ALWAYS_INCLUDE_HASHES list (Sky Sports F1, DAZN F1)
|
||||||
|
# and are documented as 24/7 channels that play F1 content year-round.
|
||||||
|
_CURATED_STREAMS = [
|
||||||
|
{
|
||||||
|
"url": "https://hmembeds.one/embed/888520f36cd94c5da4c71fddc1a5fc9b",
|
||||||
|
"title": "Sky Sports F1 (24/7)",
|
||||||
|
"quality": "HD",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://hmembeds.one/embed/fc3a54634d0867b0c02ee3223292e7c6",
|
||||||
|
"title": "DAZN F1 (24/7)",
|
||||||
|
"quality": "HD",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class CuratedExtractor(BaseExtractor):
|
||||||
|
"""Returns curated known-good 24/7 F1 channel embed URLs."""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_key(self) -> str:
|
||||||
|
return "curated"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_name(self) -> str:
|
||||||
|
return "Curated 24/7 Channels"
|
||||||
|
|
||||||
|
async def extract(self) -> list[ExtractedStream]:
|
||||||
|
streams = [
|
||||||
|
ExtractedStream(
|
||||||
|
url=entry["url"],
|
||||||
|
site_key=self.site_key,
|
||||||
|
site_name=self.site_name,
|
||||||
|
quality=entry["quality"],
|
||||||
|
title=entry["title"],
|
||||||
|
stream_type="embed",
|
||||||
|
embed_url=entry["url"],
|
||||||
|
)
|
||||||
|
for entry in _CURATED_STREAMS
|
||||||
|
]
|
||||||
|
logger.info("[curated] Returning %d curated stream(s)", len(streams))
|
||||||
|
return streams
|
||||||
203
stacks/f1-stream/files/backend/extractors/discord_source.py
Normal file
203
stacks/f1-stream/files/backend/extractors/discord_source.py
Normal file
|
|
@ -0,0 +1,203 @@
|
||||||
|
"""Discord extractor - monitors Discord channels for F1 stream links.
|
||||||
|
|
||||||
|
Reads recent messages from configured Discord channels using a user token,
|
||||||
|
extracts URLs that look like stream links, and returns them as embed streams.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from backend.extractors.base import BaseExtractor
|
||||||
|
from backend.extractors.models import ExtractedStream
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DISCORD_API = "https://discord.com/api/v9"
|
||||||
|
DISCORD_TOKEN = os.getenv("DISCORD_TOKEN", "")
|
||||||
|
# Comma-separated channel IDs to monitor
|
||||||
|
DISCORD_CHANNELS = os.getenv("DISCORD_CHANNELS", "").split(",")
|
||||||
|
# How many messages to fetch per channel
|
||||||
|
MESSAGE_LIMIT = 50
|
||||||
|
|
||||||
|
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||||
|
|
||||||
|
# URL pattern to match stream links (exclude Discord CDN, images, etc.)
|
||||||
|
URL_PATTERN = re.compile(r"https?://[^\s<>\)\]\"']+", re.IGNORECASE)
|
||||||
|
|
||||||
|
# Domains that publish news/articles, not playable streams. Discord users share
|
||||||
|
# these links during race weekends; they are NOT streams and pollute the list.
|
||||||
|
EXCLUDED_DOMAINS = {
|
||||||
|
"discord.com", "discord.gg", "cdn.discordapp.com",
|
||||||
|
"tenor.com", "giphy.com", "imgur.com",
|
||||||
|
"youtube.com", "youtu.be", "twitter.com", "x.com",
|
||||||
|
"reddit.com", "instagram.com", "tiktok.com",
|
||||||
|
"fmhy.net", "github.com", "freemotorsports.com",
|
||||||
|
# News / official sites — never playable embeds
|
||||||
|
"formula1.com", "fia.com", "skysports.com", "motorsport.com",
|
||||||
|
"driverdb.com", "autosport.com", "the-race.com", "racefans.net",
|
||||||
|
"wikipedia.org", "fantasy.formula1.com",
|
||||||
|
}
|
||||||
|
|
||||||
|
# A URL is treated as a candidate stream embed only if its path looks like
|
||||||
|
# a stream/embed/player route. This catches /embed/{id}, /stream/{id},
|
||||||
|
# /watch/{id}, /live/{slug}, /player/{...} and similar — and rejects
|
||||||
|
# /article/, /news/, /latest/, /join/, etc.
|
||||||
|
_PATH_KEYWORDS = (
|
||||||
|
"embed/", "/stream", "/streams", "/watch", "/live",
|
||||||
|
"/player", "/play/", "/sky", "/f1/", "/formula",
|
||||||
|
"/grand-prix", "/gp/", "/channel", ".m3u8", ".php",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_stream_url(url: str) -> bool:
|
||||||
|
"""Heuristic: does this URL look like an actual stream/embed/player link?
|
||||||
|
|
||||||
|
Discord users share lots of news links during race weekends. The old
|
||||||
|
filter only blocked specific domains and let everything else through,
|
||||||
|
which produced a stream list dominated by formula1.com news articles.
|
||||||
|
The new filter is positive-match: a URL must contain at least one
|
||||||
|
stream-shaped path keyword to be included.
|
||||||
|
"""
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
try:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
domain = parsed.netloc.lower()
|
||||||
|
path = parsed.path.lower()
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not domain:
|
||||||
|
return False
|
||||||
|
|
||||||
|
for excluded in EXCLUDED_DOMAINS:
|
||||||
|
if excluded in domain:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if any(path.endswith(ext) for ext in (".png", ".jpg", ".jpeg", ".gif", ".webp", ".mp4", ".webm", ".svg", ".css", ".js")):
|
||||||
|
return False
|
||||||
|
|
||||||
|
full = path + ("?" + parsed.query if parsed.query else "")
|
||||||
|
if not any(kw in full for kw in _PATH_KEYWORDS):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class DiscordExtractor(BaseExtractor):
|
||||||
|
"""Extracts stream links from Discord channel messages.
|
||||||
|
|
||||||
|
Monitors configured Discord channels for URLs shared by users,
|
||||||
|
filters to likely stream links, and returns them as embed streams.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_key(self) -> str:
|
||||||
|
return "discord"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_name(self) -> str:
|
||||||
|
return "Discord Community"
|
||||||
|
|
||||||
|
async def extract(self) -> list[ExtractedStream]:
|
||||||
|
"""Fetch recent messages from Discord channels and extract URLs."""
|
||||||
|
if not DISCORD_TOKEN:
|
||||||
|
logger.info("[discord] No DISCORD_TOKEN set, skipping")
|
||||||
|
return []
|
||||||
|
|
||||||
|
channels = [c.strip() for c in DISCORD_CHANNELS if c.strip()]
|
||||||
|
if not channels:
|
||||||
|
logger.info("[discord] No DISCORD_CHANNELS configured, skipping")
|
||||||
|
return []
|
||||||
|
|
||||||
|
streams: list[ExtractedStream] = []
|
||||||
|
seen_urls: set[str] = set()
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=15.0,
|
||||||
|
follow_redirects=True,
|
||||||
|
headers={
|
||||||
|
"Authorization": DISCORD_TOKEN,
|
||||||
|
"User-Agent": USER_AGENT,
|
||||||
|
},
|
||||||
|
) as client:
|
||||||
|
for channel_id in channels:
|
||||||
|
try:
|
||||||
|
channel_streams = await self._fetch_channel(
|
||||||
|
client, channel_id, seen_urls
|
||||||
|
)
|
||||||
|
streams.extend(channel_streams)
|
||||||
|
except Exception:
|
||||||
|
logger.debug(
|
||||||
|
"[discord] Failed to fetch channel %s",
|
||||||
|
channel_id,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("[discord] Failed to connect to Discord API")
|
||||||
|
|
||||||
|
logger.info("[discord] Extracted %d stream(s) from %d channel(s)", len(streams), len(channels))
|
||||||
|
return streams
|
||||||
|
|
||||||
|
async def _fetch_channel(
|
||||||
|
self,
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
channel_id: str,
|
||||||
|
seen_urls: set[str],
|
||||||
|
) -> list[ExtractedStream]:
|
||||||
|
"""Fetch messages from a single channel and extract stream URLs."""
|
||||||
|
resp = await client.get(
|
||||||
|
f"{DISCORD_API}/channels/{channel_id}/messages",
|
||||||
|
params={"limit": MESSAGE_LIMIT},
|
||||||
|
)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
logger.warning(
|
||||||
|
"[discord] Channel %s returned HTTP %d", channel_id, resp.status_code
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
messages = resp.json()
|
||||||
|
if not isinstance(messages, list):
|
||||||
|
return []
|
||||||
|
|
||||||
|
streams: list[ExtractedStream] = []
|
||||||
|
|
||||||
|
for msg in messages:
|
||||||
|
content = msg.get("content", "")
|
||||||
|
author = msg.get("author", {}).get("username", "unknown")
|
||||||
|
|
||||||
|
# Extract URLs from message content
|
||||||
|
urls = URL_PATTERN.findall(content)
|
||||||
|
|
||||||
|
# Also check embeds
|
||||||
|
for embed in msg.get("embeds", []):
|
||||||
|
if embed.get("url"):
|
||||||
|
urls.append(embed["url"])
|
||||||
|
|
||||||
|
for url in urls:
|
||||||
|
# Clean trailing punctuation
|
||||||
|
url = url.rstrip(".,;:!?)")
|
||||||
|
|
||||||
|
if url in seen_urls:
|
||||||
|
continue
|
||||||
|
if not _is_stream_url(url):
|
||||||
|
continue
|
||||||
|
|
||||||
|
seen_urls.add(url)
|
||||||
|
streams.append(
|
||||||
|
ExtractedStream(
|
||||||
|
url=url,
|
||||||
|
site_key=self.site_key,
|
||||||
|
site_name=self.site_name,
|
||||||
|
quality="",
|
||||||
|
title=f"Shared by {author}",
|
||||||
|
stream_type="embed",
|
||||||
|
embed_url=url,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return streams
|
||||||
510
stacks/f1-stream/files/backend/extractors/pitsport.py
Normal file
510
stacks/f1-stream/files/backend/extractors/pitsport.py
Normal file
|
|
@ -0,0 +1,510 @@
|
||||||
|
"""Pitsport.xyz extractor - fetches F1 streams from the Next.js RSC payload.
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
- Main page (pitsport.xyz) has a "Live Now" section with event cards containing
|
||||||
|
category, title, time, imageUrl props and /watch/{UUID} links.
|
||||||
|
- Schedule page (pitsport.xyz/schedule) lists all events grouped by category
|
||||||
|
(h2 headings) with /watch/{UUID} links and event titles.
|
||||||
|
- Watch pages (/watch/{UUID}) embed iframes from pushembdz.store/embed/{EMBED_UUID}.
|
||||||
|
- Embed pages contain an RSC payload with a stream config: {title, link, method}.
|
||||||
|
- When method is "player" or "hls", the link field points to a serveplay.site
|
||||||
|
m3u8 playlist. Otherwise we return the embed URL for iframe playback.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from backend.extractors.base import BaseExtractor
|
||||||
|
from backend.extractors.models import ExtractedStream
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
PITSPORT_BASE = "https://pitsport.xyz"
|
||||||
|
EMBED_BASE = "https://pushembdz.store"
|
||||||
|
USER_AGENT = (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Categories to include (case-insensitive match)
|
||||||
|
F1_CATEGORIES = {"formula 1", "formula 2", "formula 3"}
|
||||||
|
|
||||||
|
# Fallback keyword matching on combined category+title for edge cases
|
||||||
|
F1_KEYWORDS = {"formula 1", "formula one", "f1"}
|
||||||
|
GP_KEYWORD = "grand prix"
|
||||||
|
NON_F1_KEYWORDS = {
|
||||||
|
"motogp", "moto gp", "moto2", "moto3", "motoe", "indycar",
|
||||||
|
"indy car", "firestone", "nascar", "rally", "wrc", "wec",
|
||||||
|
"lemans", "le mans", "superbike", "dtm", "supercars", "arca",
|
||||||
|
"xfinity", "trucks", "super formula", "supergt", "super gt",
|
||||||
|
"ama supercross", "supercross",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _PitsportEvent:
|
||||||
|
"""An event discovered from the Pitsport site."""
|
||||||
|
|
||||||
|
category: str
|
||||||
|
title: str
|
||||||
|
watch_uuid: str
|
||||||
|
|
||||||
|
|
||||||
|
def _is_f1_category(category: str) -> bool:
|
||||||
|
"""Check if a category string matches an F1-related series."""
|
||||||
|
return category.strip().lower() in F1_CATEGORIES
|
||||||
|
|
||||||
|
|
||||||
|
def _is_f1_event(category: str, title: str) -> bool:
|
||||||
|
"""Check if an event is Formula 1 related by category or title keywords."""
|
||||||
|
# Primary check: exact category match
|
||||||
|
if _is_f1_category(category):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Secondary check: keyword matching on combined text
|
||||||
|
lower = f"{category} {title}".lower()
|
||||||
|
if any(kw in lower for kw in NON_F1_KEYWORDS):
|
||||||
|
return False
|
||||||
|
if any(kw in lower for kw in F1_KEYWORDS):
|
||||||
|
return True
|
||||||
|
if GP_KEYWORD in lower:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_live_events(html: str) -> list[_PitsportEvent]:
|
||||||
|
"""Parse live events from the main page RSC payload.
|
||||||
|
|
||||||
|
The main page contains event cards with props:
|
||||||
|
category, title, time, imageUrl
|
||||||
|
wrapped in <a href="/watch/{UUID}"> links.
|
||||||
|
"""
|
||||||
|
events: list[_PitsportEvent] = []
|
||||||
|
|
||||||
|
# Match event cards in the RSC payload - they appear as JSON-like structures
|
||||||
|
# Pattern: href="/watch/UUID" ... category":"...", "title":"..."
|
||||||
|
# In the RSC payload, the data is in the format:
|
||||||
|
# ["$","$L2","/watch/UUID",{"href":"/watch/UUID","children":["$","$L10",null,
|
||||||
|
# {"category":"...","title":"...","time":...,"imageUrl":"..."}]}]
|
||||||
|
pattern = re.compile(
|
||||||
|
r'"href":"(/watch/([0-9a-f-]{36}))"[^}]*?"category":"([^"]+)","title":"([^"]+)"',
|
||||||
|
)
|
||||||
|
for match in pattern.finditer(html):
|
||||||
|
_, uuid, category, title = match.groups()
|
||||||
|
events.append(_PitsportEvent(category=category, title=title, watch_uuid=uuid))
|
||||||
|
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_schedule_events(html: str) -> list[_PitsportEvent]:
|
||||||
|
"""Parse events from the schedule page.
|
||||||
|
|
||||||
|
The schedule page groups events under category headers (h2 elements).
|
||||||
|
In the rendered HTML:
|
||||||
|
<h2 ...>Formula 1</h2>
|
||||||
|
<div ...>
|
||||||
|
<a href="/watch/UUID">...</a>
|
||||||
|
...
|
||||||
|
</div>
|
||||||
|
|
||||||
|
In the RSC payload, similar structure with section divs containing
|
||||||
|
a category h2 and child event links with titles.
|
||||||
|
"""
|
||||||
|
events: list[_PitsportEvent] = []
|
||||||
|
|
||||||
|
# Strategy 1: Parse from rendered HTML
|
||||||
|
# Find category sections: >CategoryName</h2> followed by watch links
|
||||||
|
# Split HTML at each category header
|
||||||
|
section_pattern = re.compile(
|
||||||
|
r'>([^<]+)</h2>\s*<div[^>]*class="flex flex-wrap gap-6">(.*?)(?=</div>\s*</div>\s*(?:<div|</div>|$))',
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
for section_match in section_pattern.finditer(html):
|
||||||
|
category = section_match.group(1).strip()
|
||||||
|
section_html = section_match.group(2)
|
||||||
|
|
||||||
|
# Find all watch links in this section
|
||||||
|
link_pattern = re.compile(
|
||||||
|
r'href="/watch/([0-9a-f-]{36})".*?<h1[^>]*>([^<]+)</h1>',
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
for link_match in link_pattern.finditer(section_html):
|
||||||
|
uuid = link_match.group(1)
|
||||||
|
title = link_match.group(2).strip()
|
||||||
|
events.append(
|
||||||
|
_PitsportEvent(category=category, title=title, watch_uuid=uuid)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Strategy 2: Parse from RSC payload if rendered HTML didn't yield results
|
||||||
|
# The RSC payload has patterns like:
|
||||||
|
# "children":"Formula 1"}] ... "/watch/UUID" ... "title":"EventTitle"
|
||||||
|
if not events:
|
||||||
|
events = _parse_schedule_rsc(html)
|
||||||
|
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_schedule_rsc(html: str) -> list[_PitsportEvent]:
|
||||||
|
"""Parse events from schedule page RSC payload as fallback.
|
||||||
|
|
||||||
|
Extracts category section divs from the RSC JSON structure.
|
||||||
|
"""
|
||||||
|
events: list[_PitsportEvent] = []
|
||||||
|
|
||||||
|
# Find the RSC payload chunks
|
||||||
|
rsc_chunks = re.findall(
|
||||||
|
r'self\.__next_f\.push\(\[1,"(.*?)"\]\)', html, re.DOTALL
|
||||||
|
)
|
||||||
|
if not rsc_chunks:
|
||||||
|
return events
|
||||||
|
|
||||||
|
# Concatenate and unescape
|
||||||
|
full_payload = ""
|
||||||
|
for chunk in rsc_chunks:
|
||||||
|
try:
|
||||||
|
full_payload += chunk.encode().decode("unicode_escape")
|
||||||
|
except Exception:
|
||||||
|
full_payload += chunk
|
||||||
|
|
||||||
|
# Find category sections in the RSC data
|
||||||
|
# Pattern: "children":"CategoryName"}],["$","div",...watch links...
|
||||||
|
# Each section div contains an h2 with the category name and watch links
|
||||||
|
cat_pattern = re.compile(
|
||||||
|
r'border-gray-700 pb-2","children":"([^"]+)"\}.*?'
|
||||||
|
r'(?=border-gray-700 pb-2","children"|$)',
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
for cat_match in cat_pattern.finditer(full_payload):
|
||||||
|
category = cat_match.group(1)
|
||||||
|
section_text = cat_match.group(0)
|
||||||
|
|
||||||
|
# Find watch UUIDs and titles in this section
|
||||||
|
# Pattern: "/watch/UUID" ... "title":"EventTitle"
|
||||||
|
event_pattern = re.compile(
|
||||||
|
r'/watch/([0-9a-f-]{36}).*?"title":"([^"]+)"',
|
||||||
|
)
|
||||||
|
for ev_match in event_pattern.finditer(section_text):
|
||||||
|
uuid = ev_match.group(1)
|
||||||
|
title = ev_match.group(2)
|
||||||
|
events.append(
|
||||||
|
_PitsportEvent(category=category, title=title, watch_uuid=uuid)
|
||||||
|
)
|
||||||
|
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_embed_uuids(html: str) -> list[str]:
|
||||||
|
"""Extract embed UUIDs from a watch page.
|
||||||
|
|
||||||
|
Watch pages contain iframes like:
|
||||||
|
<iframe src="https://pushembdz.store/embed/{EMBED_UUID}" ...>
|
||||||
|
|
||||||
|
And in the RSC payload:
|
||||||
|
"iframe":"https://pushembdz.store/embed/{EMBED_UUID}"
|
||||||
|
"""
|
||||||
|
uuids: list[str] = []
|
||||||
|
|
||||||
|
# From rendered HTML
|
||||||
|
iframe_pattern = re.compile(
|
||||||
|
r'pushembdz\.store/embed/([0-9a-f-]{36})',
|
||||||
|
)
|
||||||
|
for match in iframe_pattern.finditer(html):
|
||||||
|
uuid = match.group(1)
|
||||||
|
if uuid not in uuids:
|
||||||
|
uuids.append(uuid)
|
||||||
|
|
||||||
|
return uuids
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _StreamConfig:
|
||||||
|
"""Stream configuration extracted from an embed page."""
|
||||||
|
|
||||||
|
title: str
|
||||||
|
link: str
|
||||||
|
method: str
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_stream_config(html: str) -> _StreamConfig | None:
|
||||||
|
"""Extract stream config from an embed page RSC payload.
|
||||||
|
|
||||||
|
The embed page contains an RSC payload line like:
|
||||||
|
4:["$","$Ld",null,{"stream":{"title":"...","link":"...","method":"player"},
|
||||||
|
"error":null,"slug":"..."}]
|
||||||
|
"""
|
||||||
|
# Try matching the escaped RSC payload pattern
|
||||||
|
pattern = re.compile(
|
||||||
|
r'"stream":\{["\']?\\?"title\\?"["\']?:["\']?\\?"([^"\\]+)\\?"["\']?,'
|
||||||
|
r'["\']?\\?"link\\?"["\']?:["\']?\\?"([^"\\]+)\\?"["\']?,'
|
||||||
|
r'["\']?\\?"method\\?"["\']?:["\']?\\?"([^"\\]+)\\?"',
|
||||||
|
)
|
||||||
|
match = pattern.search(html)
|
||||||
|
if match:
|
||||||
|
return _StreamConfig(
|
||||||
|
title=match.group(1),
|
||||||
|
link=match.group(2),
|
||||||
|
method=match.group(3),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Simpler pattern for double-escaped payload
|
||||||
|
pattern2 = re.compile(
|
||||||
|
r'\\?"stream\\?":\{\\?"title\\?":\\?"([^\\]+)\\?",'
|
||||||
|
r'\\?"link\\?":\\?"([^\\]+)\\?",'
|
||||||
|
r'\\?"method\\?":\\?"([^\\]+)\\?"',
|
||||||
|
)
|
||||||
|
match = pattern2.search(html)
|
||||||
|
if match:
|
||||||
|
return _StreamConfig(
|
||||||
|
title=match.group(1),
|
||||||
|
link=match.group(2),
|
||||||
|
method=match.group(3),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Most lenient: just find the three fields near each other
|
||||||
|
pattern3 = re.compile(
|
||||||
|
r'"stream"\s*:\s*\{\s*"title"\s*:\s*"([^"]+)"\s*,'
|
||||||
|
r'\s*"link"\s*:\s*"([^"]+)"\s*,'
|
||||||
|
r'\s*"method"\s*:\s*"([^"]+)"',
|
||||||
|
)
|
||||||
|
match = pattern3.search(html)
|
||||||
|
if match:
|
||||||
|
return _StreamConfig(
|
||||||
|
title=match.group(1),
|
||||||
|
link=match.group(2),
|
||||||
|
method=match.group(3),
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _is_m3u8_method(method: str) -> bool:
|
||||||
|
"""Check if the stream method indicates a direct HLS stream."""
|
||||||
|
return method.lower() in ("player", "hls")
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_m3u8_url(link: str) -> str:
|
||||||
|
"""Convert a serveplay.site player URL to an m3u8 playlist URL.
|
||||||
|
|
||||||
|
Input: https://dash.serveplay.site/{channel}/index.html
|
||||||
|
Output: https://dash.serveplay.site/{channel}/index.html
|
||||||
|
|
||||||
|
The index.html IS the m3u8 playlist (served with proper content-type
|
||||||
|
when fetched with the correct Referer header).
|
||||||
|
"""
|
||||||
|
return link
|
||||||
|
|
||||||
|
|
||||||
|
class PitsportExtractor(BaseExtractor):
|
||||||
|
"""Extracts F1 streams from Pitsport.xyz.
|
||||||
|
|
||||||
|
Scrapes the Next.js RSC payload from the main page and schedule page
|
||||||
|
to find F1 events, then resolves embed UUIDs to stream configurations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_key(self) -> str:
|
||||||
|
return "pitsport"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_name(self) -> str:
|
||||||
|
return "Pitsport"
|
||||||
|
|
||||||
|
async def extract(self) -> list[ExtractedStream]:
|
||||||
|
"""Fetch F1 events and return stream URLs or embed URLs."""
|
||||||
|
streams: list[ExtractedStream] = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=20.0,
|
||||||
|
follow_redirects=True,
|
||||||
|
headers={"User-Agent": USER_AGENT},
|
||||||
|
) as client:
|
||||||
|
# Fetch both pages to get comprehensive event data
|
||||||
|
events = await self._discover_events(client)
|
||||||
|
logger.info(
|
||||||
|
"[pitsport] Found %d F1 event(s) to process", len(events)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Deduplicate by watch UUID
|
||||||
|
seen_uuids: set[str] = set()
|
||||||
|
unique_events: list[_PitsportEvent] = []
|
||||||
|
for ev in events:
|
||||||
|
if ev.watch_uuid not in seen_uuids:
|
||||||
|
seen_uuids.add(ev.watch_uuid)
|
||||||
|
unique_events.append(ev)
|
||||||
|
|
||||||
|
# For each event, resolve streams
|
||||||
|
for event in unique_events:
|
||||||
|
event_streams = await self._resolve_event_streams(
|
||||||
|
client, event
|
||||||
|
)
|
||||||
|
streams.extend(event_streams)
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
logger.exception("[pitsport] Failed to extract streams")
|
||||||
|
|
||||||
|
logger.info("[pitsport] Extracted %d stream(s)", len(streams))
|
||||||
|
return streams
|
||||||
|
|
||||||
|
async def _discover_events(
|
||||||
|
self, client: httpx.AsyncClient
|
||||||
|
) -> list[_PitsportEvent]:
|
||||||
|
"""Discover F1 events from both main page and schedule page."""
|
||||||
|
all_events: list[_PitsportEvent] = []
|
||||||
|
|
||||||
|
# Fetch main page for live events
|
||||||
|
try:
|
||||||
|
resp = await client.get(PITSPORT_BASE)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
live_events = _parse_live_events(resp.text)
|
||||||
|
logger.info(
|
||||||
|
"[pitsport] Main page: %d live event(s)", len(live_events)
|
||||||
|
)
|
||||||
|
for ev in live_events:
|
||||||
|
if _is_f1_event(ev.category, ev.title):
|
||||||
|
all_events.append(ev)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"[pitsport] Main page returned HTTP %d", resp.status_code
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("[pitsport] Failed to fetch main page")
|
||||||
|
|
||||||
|
# Fetch schedule page for upcoming events
|
||||||
|
try:
|
||||||
|
resp = await client.get(f"{PITSPORT_BASE}/schedule")
|
||||||
|
if resp.status_code == 200:
|
||||||
|
schedule_events = _parse_schedule_events(resp.text)
|
||||||
|
logger.info(
|
||||||
|
"[pitsport] Schedule page: %d total event(s)",
|
||||||
|
len(schedule_events),
|
||||||
|
)
|
||||||
|
for ev in schedule_events:
|
||||||
|
if _is_f1_event(ev.category, ev.title):
|
||||||
|
all_events.append(ev)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"[pitsport] Schedule page returned HTTP %d",
|
||||||
|
resp.status_code,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("[pitsport] Failed to fetch schedule page")
|
||||||
|
|
||||||
|
return all_events
|
||||||
|
|
||||||
|
async def _resolve_event_streams(
|
||||||
|
self, client: httpx.AsyncClient, event: _PitsportEvent
|
||||||
|
) -> list[ExtractedStream]:
|
||||||
|
"""Resolve an event's watch page to actual stream URLs."""
|
||||||
|
streams: list[ExtractedStream] = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Fetch the watch page to get embed UUIDs
|
||||||
|
watch_url = f"{PITSPORT_BASE}/watch/{event.watch_uuid}"
|
||||||
|
resp = await client.get(watch_url)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
logger.debug(
|
||||||
|
"[pitsport] Watch page %s returned HTTP %d",
|
||||||
|
event.watch_uuid,
|
||||||
|
resp.status_code,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
embed_uuids = _parse_embed_uuids(resp.text)
|
||||||
|
if not embed_uuids:
|
||||||
|
logger.debug(
|
||||||
|
"[pitsport] No embed UUIDs found for %s", event.watch_uuid
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"[pitsport] Event '%s' has %d embed(s)",
|
||||||
|
event.title,
|
||||||
|
len(embed_uuids),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Resolve each embed to a stream config
|
||||||
|
for i, embed_uuid in enumerate(embed_uuids):
|
||||||
|
stream = await self._resolve_embed(
|
||||||
|
client, embed_uuid, event, stream_num=i + 1
|
||||||
|
)
|
||||||
|
if stream:
|
||||||
|
streams.append(stream)
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
logger.debug(
|
||||||
|
"[pitsport] Failed to resolve event %s",
|
||||||
|
event.watch_uuid,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return streams
|
||||||
|
|
||||||
|
async def _resolve_embed(
|
||||||
|
self,
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
embed_uuid: str,
|
||||||
|
event: _PitsportEvent,
|
||||||
|
stream_num: int,
|
||||||
|
) -> ExtractedStream | None:
|
||||||
|
"""Resolve an embed UUID to a stream configuration."""
|
||||||
|
try:
|
||||||
|
embed_url = f"{EMBED_BASE}/embed/{embed_uuid}"
|
||||||
|
resp = await client.get(embed_url)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
logger.debug(
|
||||||
|
"[pitsport] Embed page %s returned HTTP %d",
|
||||||
|
embed_uuid,
|
||||||
|
resp.status_code,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
config = _parse_stream_config(resp.text)
|
||||||
|
if not config:
|
||||||
|
logger.debug(
|
||||||
|
"[pitsport] No stream config found in embed %s",
|
||||||
|
embed_uuid,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Build the stream title
|
||||||
|
stream_title = f"{event.category} - {event.title}"
|
||||||
|
if config.title:
|
||||||
|
stream_title += f" ({config.title})"
|
||||||
|
if stream_num > 1:
|
||||||
|
stream_title += f" #{stream_num}"
|
||||||
|
|
||||||
|
if _is_m3u8_method(config.method) and "serveplay.site" in config.link:
|
||||||
|
# Direct m3u8 stream
|
||||||
|
m3u8_url = _extract_m3u8_url(config.link)
|
||||||
|
return ExtractedStream(
|
||||||
|
url=m3u8_url,
|
||||||
|
site_key=self.site_key,
|
||||||
|
site_name=self.site_name,
|
||||||
|
quality="",
|
||||||
|
title=stream_title,
|
||||||
|
stream_type="m3u8",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Iframe embed fallback
|
||||||
|
return ExtractedStream(
|
||||||
|
url=embed_url,
|
||||||
|
site_key=self.site_key,
|
||||||
|
site_name=self.site_name,
|
||||||
|
quality="",
|
||||||
|
title=stream_title,
|
||||||
|
stream_type="embed",
|
||||||
|
embed_url=embed_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
logger.debug(
|
||||||
|
"[pitsport] Failed to resolve embed %s",
|
||||||
|
embed_uuid,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
return None
|
||||||
270
stacks/f1-stream/files/backend/extractors/ppv.py
Normal file
270
stacks/f1-stream/files/backend/extractors/ppv.py
Normal file
|
|
@ -0,0 +1,270 @@
|
||||||
|
"""PPV.to extractor - fetches F1 streams via the public PPV API.
|
||||||
|
|
||||||
|
Returns embed URLs (pooembed.eu) for iframe playback.
|
||||||
|
The API at api.ppv.to/api/streams requires no authentication.
|
||||||
|
Falls back to api.ppv.st if the primary API is unreachable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from backend.extractors.base import BaseExtractor
|
||||||
|
from backend.extractors.models import ExtractedStream
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
PRIMARY_API = "https://api.ppv.to/api/streams"
|
||||||
|
FALLBACK_API = "https://api.ppv.st/api/streams"
|
||||||
|
EMBED_BASE = "https://pooembed.eu/embed"
|
||||||
|
|
||||||
|
USER_AGENT = (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Category name for motorsport on PPV.to
|
||||||
|
MOTORSPORT_CATEGORY = "motorsports"
|
||||||
|
|
||||||
|
# Only include events matching these keywords (case-insensitive)
|
||||||
|
F1_KEYWORDS = {"formula 1", "formula one", "f1", "sky sports f1"}
|
||||||
|
# Grand Prix is shared with MotoGP/IndyCar — only match if no other series keywords
|
||||||
|
GP_KEYWORD = "grand prix"
|
||||||
|
NON_F1_KEYWORDS = {
|
||||||
|
"motogp", "moto gp", "moto2", "moto3", "motoe",
|
||||||
|
"indycar", "indy car", "firestone", "nascar",
|
||||||
|
"rally", "wrc", "wec", "lemans", "le mans",
|
||||||
|
"superbike", "dtm", "supercars",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _is_f1_stream(name: str, category_name: str = "") -> bool:
|
||||||
|
"""Check if a stream is Formula 1 related.
|
||||||
|
|
||||||
|
Checks both the stream name and the category name.
|
||||||
|
A stream qualifies if:
|
||||||
|
- It is in the motorsport category AND matches F1 keywords, OR
|
||||||
|
- It matches F1 keywords regardless of category.
|
||||||
|
"""
|
||||||
|
lower_name = name.lower()
|
||||||
|
lower_cat = category_name.lower()
|
||||||
|
|
||||||
|
# Reject if it contains non-F1 motorsport keywords
|
||||||
|
if any(kw in lower_name for kw in NON_F1_KEYWORDS):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Direct F1 keyword match in the stream name
|
||||||
|
if any(kw in lower_name for kw in F1_KEYWORDS):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# "grand prix" in the name, only if in motorsports category and no non-F1 keywords
|
||||||
|
if GP_KEYWORD in lower_name and MOTORSPORT_CATEGORY in lower_cat:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# If the category is motorsport, also check category-level keywords
|
||||||
|
if MOTORSPORT_CATEGORY in lower_cat and any(kw in lower_cat for kw in F1_KEYWORDS):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class PPVExtractor(BaseExtractor):
|
||||||
|
"""Extracts embed URLs from PPV.to's public JSON API.
|
||||||
|
|
||||||
|
Uses the endpoint:
|
||||||
|
- GET https://api.ppv.to/api/streams -> all streams grouped by category
|
||||||
|
- Fallback: https://api.ppv.st/api/streams
|
||||||
|
|
||||||
|
Each stream object contains an `iframe` field with the embed URL,
|
||||||
|
or a `uri_name` from which the embed URL can be constructed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_key(self) -> str:
|
||||||
|
return "ppv"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_name(self) -> str:
|
||||||
|
return "PPV.to"
|
||||||
|
|
||||||
|
async def _fetch_streams(self, client: httpx.AsyncClient) -> dict | None:
|
||||||
|
"""Try primary and fallback APIs, return parsed JSON or None."""
|
||||||
|
for api_url in (PRIMARY_API, FALLBACK_API):
|
||||||
|
try:
|
||||||
|
resp = await client.get(api_url)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
data = resp.json()
|
||||||
|
logger.info("[ppv] Fetched streams from %s", api_url)
|
||||||
|
return data
|
||||||
|
logger.warning(
|
||||||
|
"[ppv] %s returned HTTP %d", api_url, resp.status_code
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.debug(
|
||||||
|
"[ppv] Failed to reach %s", api_url, exc_info=True
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def extract(self) -> list[ExtractedStream]:
|
||||||
|
"""Fetch F1 streams and return embed URLs for iframe playback."""
|
||||||
|
streams: list[ExtractedStream] = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=15.0,
|
||||||
|
follow_redirects=True,
|
||||||
|
headers={"User-Agent": USER_AGENT, "Accept": "application/json"},
|
||||||
|
) as client:
|
||||||
|
data = await self._fetch_streams(client)
|
||||||
|
if data is None:
|
||||||
|
logger.warning("[ppv] Could not fetch streams from any API")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# The API returns:
|
||||||
|
# { "streams": [ { "category": "Name", "id": N, "streams": [...] }, ... ] }
|
||||||
|
# Flatten into (category_name, stream_obj) tuples.
|
||||||
|
all_streams = self._normalize_streams(data)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[ppv] Found %d total stream(s) across all categories",
|
||||||
|
len(all_streams),
|
||||||
|
)
|
||||||
|
|
||||||
|
for category_name, stream_obj in all_streams:
|
||||||
|
name = stream_obj.get("name", "") or stream_obj.get("title", "")
|
||||||
|
|
||||||
|
if not _is_f1_stream(name, category_name):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Build the embed URL
|
||||||
|
embed_url = self._get_embed_url(stream_obj)
|
||||||
|
if not embed_url:
|
||||||
|
logger.debug("[ppv] No embed URL for stream: %s", name)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Extract quality from tag if present
|
||||||
|
tag = stream_obj.get("tag", "")
|
||||||
|
quality = tag if tag else ""
|
||||||
|
|
||||||
|
# Build descriptive title
|
||||||
|
title = name
|
||||||
|
viewers = stream_obj.get("viewers")
|
||||||
|
if viewers and int(viewers) > 0:
|
||||||
|
title += f" ({viewers} viewers)"
|
||||||
|
|
||||||
|
# Check for substreams (multiple quality/language options)
|
||||||
|
substreams = stream_obj.get("substreams")
|
||||||
|
if isinstance(substreams, list) and substreams:
|
||||||
|
for i, sub in enumerate(substreams):
|
||||||
|
sub_embed = sub.get("iframe", "") or sub.get("embed_url", "")
|
||||||
|
if not sub_embed:
|
||||||
|
# Fall back to the parent embed URL
|
||||||
|
sub_embed = embed_url
|
||||||
|
sub_name = sub.get("name", "") or sub.get("label", "")
|
||||||
|
sub_quality = sub.get("tag", "") or sub.get("quality", "") or quality
|
||||||
|
sub_title = f"{name}"
|
||||||
|
if sub_name:
|
||||||
|
sub_title += f" - {sub_name}"
|
||||||
|
elif i > 0:
|
||||||
|
sub_title += f" #{i + 1}"
|
||||||
|
|
||||||
|
streams.append(
|
||||||
|
ExtractedStream(
|
||||||
|
url=sub_embed,
|
||||||
|
site_key=self.site_key,
|
||||||
|
site_name=self.site_name,
|
||||||
|
quality=sub_quality,
|
||||||
|
title=sub_title,
|
||||||
|
stream_type="embed",
|
||||||
|
embed_url=sub_embed,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Single stream, no substreams
|
||||||
|
streams.append(
|
||||||
|
ExtractedStream(
|
||||||
|
url=embed_url,
|
||||||
|
site_key=self.site_key,
|
||||||
|
site_name=self.site_name,
|
||||||
|
quality=quality,
|
||||||
|
title=title,
|
||||||
|
stream_type="embed",
|
||||||
|
embed_url=embed_url,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
logger.exception("[ppv] Failed to extract streams")
|
||||||
|
|
||||||
|
logger.info("[ppv] Extracted %d F1 stream(s)", len(streams))
|
||||||
|
return streams
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _normalize_streams(data: dict | list) -> list[tuple[str, dict]]:
|
||||||
|
"""Normalize the API response into a flat list of (category_name, stream_dict) tuples.
|
||||||
|
|
||||||
|
The PPV API returns data in this shape:
|
||||||
|
{
|
||||||
|
"streams": [
|
||||||
|
{
|
||||||
|
"category": "Motorsports",
|
||||||
|
"id": 35,
|
||||||
|
"streams": [ { stream objects... } ]
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
Each category group has a "category" string and a nested "streams" list.
|
||||||
|
"""
|
||||||
|
result: list[tuple[str, dict]] = []
|
||||||
|
|
||||||
|
# Handle the top-level wrapper
|
||||||
|
if isinstance(data, dict):
|
||||||
|
categories = data.get("streams", [])
|
||||||
|
elif isinstance(data, list):
|
||||||
|
categories = data
|
||||||
|
else:
|
||||||
|
return result
|
||||||
|
|
||||||
|
for category_group in categories:
|
||||||
|
if not isinstance(category_group, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
category_name = category_group.get("category", "")
|
||||||
|
|
||||||
|
# The nested streams within this category
|
||||||
|
inner_streams = category_group.get("streams", [])
|
||||||
|
if isinstance(inner_streams, list):
|
||||||
|
for stream_obj in inner_streams:
|
||||||
|
if isinstance(stream_obj, dict):
|
||||||
|
# Attach category_name to each stream for filtering
|
||||||
|
result.append((category_name, stream_obj))
|
||||||
|
elif isinstance(category_group, dict) and "name" in category_group:
|
||||||
|
# Fallback: the item itself is a stream (flat list format)
|
||||||
|
result.append((category_name, category_group))
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_embed_url(stream: dict) -> str:
|
||||||
|
"""Extract or construct the embed URL for a stream."""
|
||||||
|
# Prefer the iframe field directly
|
||||||
|
iframe = stream.get("iframe", "")
|
||||||
|
if iframe:
|
||||||
|
return iframe
|
||||||
|
|
||||||
|
# Construct from uri_name
|
||||||
|
uri_name = stream.get("uri_name", "") or stream.get("uri", "")
|
||||||
|
if uri_name:
|
||||||
|
# Strip leading slash if present
|
||||||
|
uri_name = uri_name.lstrip("/")
|
||||||
|
return f"{EMBED_BASE}/{uri_name}"
|
||||||
|
|
||||||
|
# Last resort: use the stream id
|
||||||
|
stream_id = stream.get("id")
|
||||||
|
if stream_id:
|
||||||
|
return f"{EMBED_BASE}/{stream_id}"
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
@ -6,6 +6,7 @@ from datetime import datetime, timezone
|
||||||
from backend.extractors.models import ExtractedStream
|
from backend.extractors.models import ExtractedStream
|
||||||
from backend.extractors.registry import ExtractorRegistry
|
from backend.extractors.registry import ExtractorRegistry
|
||||||
from backend.health import StreamHealthChecker
|
from backend.health import StreamHealthChecker
|
||||||
|
from backend.playback_verifier import PlaybackVerifier
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -29,6 +30,11 @@ class ExtractionService:
|
||||||
self._last_run: str | None = None
|
self._last_run: str | None = None
|
||||||
self._last_run_stream_count: int = 0
|
self._last_run_stream_count: int = 0
|
||||||
self._health_checker = StreamHealthChecker()
|
self._health_checker = StreamHealthChecker()
|
||||||
|
self._playback_verifier = PlaybackVerifier()
|
||||||
|
|
||||||
|
async def shutdown(self) -> None:
|
||||||
|
"""Release the headless browser instance owned by the verifier."""
|
||||||
|
await self._playback_verifier.shutdown()
|
||||||
|
|
||||||
async def run_extraction(self) -> None:
|
async def run_extraction(self) -> None:
|
||||||
"""Run all extractors, health-check results, and cache them.
|
"""Run all extractors, health-check results, and cache them.
|
||||||
|
|
@ -43,31 +49,63 @@ class ExtractionService:
|
||||||
|
|
||||||
streams = await self._registry.extract_all()
|
streams = await self._registry.extract_all()
|
||||||
|
|
||||||
# Run health checks on all extracted streams
|
# Run health checks + headless-browser playback verification.
|
||||||
|
# Both stream types are now verified end-to-end so the user only
|
||||||
|
# ever sees streams that actually play in a browser.
|
||||||
if streams:
|
if streams:
|
||||||
# Separate m3u8 streams (need health check) from embed streams (skip)
|
|
||||||
m3u8_streams = [s for s in streams if s.stream_type != "embed"]
|
m3u8_streams = [s for s in streams if s.stream_type != "embed"]
|
||||||
embed_streams = [s for s in streams if s.stream_type == "embed"]
|
embed_streams = [s for s in streams if s.stream_type == "embed"]
|
||||||
|
|
||||||
# Mark embed streams as live (no health check possible for iframes)
|
# m3u8 streams: cheap structural health check (validates manifest,
|
||||||
for stream in embed_streams:
|
# checks first variant playlist), then a headless-browser test
|
||||||
stream.is_live = True
|
# to confirm hls.js can decode and render frames.
|
||||||
stream.response_time_ms = 0
|
|
||||||
stream.checked_at = start.isoformat()
|
|
||||||
|
|
||||||
# Health-check only m3u8 streams
|
|
||||||
if m3u8_streams:
|
if m3u8_streams:
|
||||||
stream_dicts = [s.to_dict() for s in m3u8_streams]
|
stream_dicts = [s.to_dict() for s in m3u8_streams]
|
||||||
health_map = await self._health_checker.check_all(stream_dicts)
|
health_map = await self._health_checker.check_all(stream_dicts)
|
||||||
|
|
||||||
for stream in m3u8_streams:
|
for stream in m3u8_streams:
|
||||||
health = health_map.get(stream.url)
|
health = health_map.get(stream.url)
|
||||||
if health:
|
if health:
|
||||||
stream.is_live = health.is_live
|
|
||||||
stream.response_time_ms = health.response_time_ms
|
stream.response_time_ms = health.response_time_ms
|
||||||
stream.checked_at = health.checked_at
|
stream.checked_at = health.checked_at
|
||||||
if health.bitrate > 0:
|
if health.bitrate > 0:
|
||||||
stream.bitrate = health.bitrate
|
stream.bitrate = health.bitrate
|
||||||
|
# tentatively mark live; final word comes from the verifier
|
||||||
|
stream.is_live = health.is_live
|
||||||
|
|
||||||
|
# Browser verification: applies to both m3u8 (only those that
|
||||||
|
# passed structural health) and embed (always — they have no
|
||||||
|
# other way to verify).
|
||||||
|
verify_items: list[tuple[str, str]] = []
|
||||||
|
for stream in m3u8_streams:
|
||||||
|
if stream.is_live:
|
||||||
|
verify_items.append((stream.url, "m3u8"))
|
||||||
|
for stream in embed_streams:
|
||||||
|
verify_items.append((stream.embed_url or stream.url, "embed"))
|
||||||
|
|
||||||
|
verdicts = await self._playback_verifier.verify_many(verify_items)
|
||||||
|
|
||||||
|
now_iso = datetime.now(timezone.utc).isoformat()
|
||||||
|
for stream in m3u8_streams:
|
||||||
|
if not stream.is_live:
|
||||||
|
continue # already failed health check
|
||||||
|
verdict = verdicts.get(stream.url)
|
||||||
|
if verdict is None:
|
||||||
|
continue # verifier disabled or unavailable
|
||||||
|
stream.is_live = verdict.is_playable
|
||||||
|
stream.checked_at = now_iso
|
||||||
|
|
||||||
|
for stream in embed_streams:
|
||||||
|
key = stream.embed_url or stream.url
|
||||||
|
verdict = verdicts.get(key)
|
||||||
|
stream.checked_at = now_iso
|
||||||
|
if verdict is None:
|
||||||
|
# Verifier unavailable — fall back to "trust extractor".
|
||||||
|
# This keeps the service usable even without playwright.
|
||||||
|
stream.is_live = True
|
||||||
|
stream.response_time_ms = 0
|
||||||
|
else:
|
||||||
|
stream.is_live = verdict.is_playable
|
||||||
|
stream.response_time_ms = verdict.elapsed_ms
|
||||||
|
|
||||||
# Group streams by site_key and update cache
|
# Group streams by site_key and update cache
|
||||||
new_cache: dict[str, list[ExtractedStream]] = {}
|
new_cache: dict[str, list[ExtractedStream]] = {}
|
||||||
|
|
|
||||||
190
stacks/f1-stream/files/backend/extractors/timstreams.py
Normal file
190
stacks/f1-stream/files/backend/extractors/timstreams.py
Normal file
|
|
@ -0,0 +1,190 @@
|
||||||
|
"""TimStreams extractor - fetches F1 streams from the TimStreams JSON API.
|
||||||
|
|
||||||
|
Returns embed URLs from hmembeds.one for iframe playback.
|
||||||
|
The public API at stra.viaplus.site/main requires no authentication
|
||||||
|
and returns all events/channels across Events, Replays, and 24/7 categories.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from backend.extractors.base import BaseExtractor
|
||||||
|
from backend.extractors.models import ExtractedStream
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
API_URL = "https://stra.viaplus.site/main"
|
||||||
|
USER_AGENT = (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Direct F1 keyword matches (case-insensitive)
|
||||||
|
F1_KEYWORDS = {"formula 1", "formula one", "f1", "sky sports f1", "dazn f1"}
|
||||||
|
# "Grand prix" is F1-related only if non-F1 motorsport keywords are absent
|
||||||
|
GP_KEYWORD = "grand prix"
|
||||||
|
# Exclude these motorsport series when matching on "grand prix"
|
||||||
|
NON_F1_KEYWORDS = {
|
||||||
|
"motogp", "moto gp", "moto2", "moto3", "motoe",
|
||||||
|
"indycar", "indy car", "nascar",
|
||||||
|
"rally", "wrc", "wec", "lemans", "le mans",
|
||||||
|
"superbike", "dtm", "supercars",
|
||||||
|
}
|
||||||
|
|
||||||
|
# 24/7 channels that should always be included (embed hashes on hmembeds.one)
|
||||||
|
ALWAYS_INCLUDE_HASHES = {
|
||||||
|
"888520f36cd94c5da4c71fddc1a5fc9b", # Sky Sports F1
|
||||||
|
"fc3a54634d0867b0c02ee3223292e7c6", # DAZN F1
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _is_f1_event(name: str) -> bool:
|
||||||
|
"""Check if an event/channel is Formula 1 related by name.
|
||||||
|
|
||||||
|
Returns True when the name contains a direct F1 keyword, or contains
|
||||||
|
"grand prix" without non-F1 series keywords.
|
||||||
|
|
||||||
|
Note: The TimStreams API genre field (genre=2) covers ALL sports channels,
|
||||||
|
not just motorsport, so we rely solely on name-based matching.
|
||||||
|
"""
|
||||||
|
lower = name.lower()
|
||||||
|
|
||||||
|
# Direct F1 keyword match
|
||||||
|
if any(kw in lower for kw in F1_KEYWORDS):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Grand prix without competing series
|
||||||
|
if GP_KEYWORD in lower and not any(kw in lower for kw in NON_F1_KEYWORDS):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_embed_hash(url: str) -> str | None:
|
||||||
|
"""Extract the hash from an hmembeds.one embed URL.
|
||||||
|
|
||||||
|
Expected format: https://hmembeds.one/embed/{hash}
|
||||||
|
Returns the hash string, or None if the URL is not in the expected format.
|
||||||
|
"""
|
||||||
|
if not url:
|
||||||
|
return None
|
||||||
|
# Handle both with and without trailing slash
|
||||||
|
url = url.rstrip("/")
|
||||||
|
prefix = "https://hmembeds.one/embed/"
|
||||||
|
alt_prefix = "http://hmembeds.one/embed/"
|
||||||
|
if url.startswith(prefix):
|
||||||
|
return url[len(prefix):] or None
|
||||||
|
if url.startswith(alt_prefix):
|
||||||
|
return url[len(alt_prefix):] or None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _is_always_include(url: str) -> bool:
|
||||||
|
"""Check if a stream URL is one of the always-include 24/7 channels."""
|
||||||
|
embed_hash = _extract_embed_hash(url)
|
||||||
|
return embed_hash in ALWAYS_INCLUDE_HASHES if embed_hash else False
|
||||||
|
|
||||||
|
|
||||||
|
class TimStreamsExtractor(BaseExtractor):
|
||||||
|
"""Extracts embed URLs from TimStreams' public JSON API.
|
||||||
|
|
||||||
|
The API at stra.viaplus.site/main returns a JSON array of categories,
|
||||||
|
each containing events with stream URLs pointing to hmembeds.one embeds.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_key(self) -> str:
|
||||||
|
return "timstreams"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def site_name(self) -> str:
|
||||||
|
return "TimStreams"
|
||||||
|
|
||||||
|
async def extract(self) -> list[ExtractedStream]:
|
||||||
|
"""Fetch F1 events/channels and return embed URLs for iframe playback."""
|
||||||
|
streams: list[ExtractedStream] = []
|
||||||
|
seen_urls: set[str] = set()
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=15.0,
|
||||||
|
follow_redirects=True,
|
||||||
|
headers={"User-Agent": USER_AGENT, "Accept": "application/json"},
|
||||||
|
) as client:
|
||||||
|
resp = await client.get(API_URL)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
logger.warning(
|
||||||
|
"[timstreams] API returned HTTP %d", resp.status_code
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
data = resp.json()
|
||||||
|
if not isinstance(data, list):
|
||||||
|
logger.warning("[timstreams] Unexpected API response type: %s", type(data).__name__)
|
||||||
|
return []
|
||||||
|
|
||||||
|
logger.info("[timstreams] API returned %d categorie(s)", len(data))
|
||||||
|
|
||||||
|
for category in data:
|
||||||
|
category_name = category.get("category", "Unknown")
|
||||||
|
events = category.get("events", [])
|
||||||
|
if not isinstance(events, list):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for event in events:
|
||||||
|
event_name = event.get("name", "Unknown")
|
||||||
|
event_streams = event.get("streams", [])
|
||||||
|
|
||||||
|
if not isinstance(event_streams, list) or not event_streams:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check if any stream URL matches an always-include channel
|
||||||
|
always_include = any(
|
||||||
|
_is_always_include(s.get("url", ""))
|
||||||
|
for s in event_streams
|
||||||
|
)
|
||||||
|
|
||||||
|
# Filter: must be F1-related or an always-include channel
|
||||||
|
if not always_include and not _is_f1_event(event_name):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for stream_info in event_streams:
|
||||||
|
stream_name = stream_info.get("name", "")
|
||||||
|
stream_url = stream_info.get("url", "")
|
||||||
|
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Deduplicate by URL
|
||||||
|
if stream_url in seen_urls:
|
||||||
|
continue
|
||||||
|
seen_urls.add(stream_url)
|
||||||
|
|
||||||
|
# Build a descriptive title
|
||||||
|
title = event_name
|
||||||
|
if stream_name and stream_name.lower() != event_name.lower():
|
||||||
|
title = f"{event_name} - {stream_name}"
|
||||||
|
if category_name:
|
||||||
|
title = f"[{category_name}] {title}"
|
||||||
|
|
||||||
|
streams.append(
|
||||||
|
ExtractedStream(
|
||||||
|
url=stream_url,
|
||||||
|
site_key=self.site_key,
|
||||||
|
site_name=self.site_name,
|
||||||
|
quality="",
|
||||||
|
title=title,
|
||||||
|
stream_type="embed",
|
||||||
|
embed_url=stream_url,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.warning("[timstreams] API request timed out")
|
||||||
|
except Exception:
|
||||||
|
logger.exception("[timstreams] Failed to fetch from API")
|
||||||
|
|
||||||
|
logger.info("[timstreams] Extracted %d stream(s)", len(streams))
|
||||||
|
return streams
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||||
from apscheduler.triggers.cron import CronTrigger
|
from apscheduler.triggers.cron import CronTrigger
|
||||||
|
|
@ -13,6 +14,7 @@ from fastapi.staticfiles import StaticFiles
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from starlette.responses import Response, StreamingResponse
|
from starlette.responses import Response, StreamingResponse
|
||||||
|
|
||||||
|
from backend.embed_proxy import fetch_embed, relay_asset
|
||||||
from backend.extractors import create_extraction_service
|
from backend.extractors import create_extraction_service
|
||||||
from backend.proxy import proxy_playlist, relay_stream
|
from backend.proxy import proxy_playlist, relay_stream
|
||||||
from backend.schedule import ScheduleService
|
from backend.schedule import ScheduleService
|
||||||
|
|
@ -117,10 +119,6 @@ async def lifespan(app: FastAPI):
|
||||||
# Startup: load schedule and start background scheduler
|
# Startup: load schedule and start background scheduler
|
||||||
await schedule_service.initialize()
|
await schedule_service.initialize()
|
||||||
|
|
||||||
# Run initial extraction
|
|
||||||
logger.info("Running initial stream extraction...")
|
|
||||||
await extraction_service.run_extraction()
|
|
||||||
|
|
||||||
# Schedule daily schedule refresh
|
# Schedule daily schedule refresh
|
||||||
scheduler.add_job(
|
scheduler.add_job(
|
||||||
_scheduled_refresh,
|
_scheduled_refresh,
|
||||||
|
|
@ -130,13 +128,18 @@ async def lifespan(app: FastAPI):
|
||||||
replace_existing=True,
|
replace_existing=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Schedule periodic stream extraction (default: every 30 minutes)
|
# Schedule periodic stream extraction (default: every 30 minutes).
|
||||||
|
# next_run_time fires the first run 8s after startup. We don't run
|
||||||
|
# extraction inline here because it calls the playback verifier,
|
||||||
|
# which hits http://127.0.0.1:8000/embed for embed streams — uvicorn
|
||||||
|
# isn't listening yet inside the lifespan startup phase.
|
||||||
scheduler.add_job(
|
scheduler.add_job(
|
||||||
_scheduled_extraction,
|
_scheduled_extraction,
|
||||||
trigger=IntervalTrigger(minutes=30),
|
trigger=IntervalTrigger(minutes=30),
|
||||||
id="stream_extraction",
|
id="stream_extraction",
|
||||||
name="Extract streams from all registered sites",
|
name="Extract streams from all registered sites",
|
||||||
replace_existing=True,
|
replace_existing=True,
|
||||||
|
next_run_time=datetime.now(timezone.utc) + timedelta(seconds=8),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Schedule token refresh every 4 minutes (safe margin for 5-min CDN tokens).
|
# Schedule token refresh every 4 minutes (safe margin for 5-min CDN tokens).
|
||||||
|
|
@ -159,6 +162,10 @@ async def lifespan(app: FastAPI):
|
||||||
# Shutdown
|
# Shutdown
|
||||||
scheduler.shutdown(wait=False)
|
scheduler.shutdown(wait=False)
|
||||||
logger.info("APScheduler shut down")
|
logger.info("APScheduler shut down")
|
||||||
|
try:
|
||||||
|
await extraction_service.shutdown()
|
||||||
|
except Exception:
|
||||||
|
logger.exception("extraction_service shutdown failed")
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(title="F1 Streams", lifespan=lifespan)
|
app = FastAPI(title="F1 Streams", lifespan=lifespan)
|
||||||
|
|
@ -409,6 +416,37 @@ async def relay_endpoint(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# --- Embed iframe-stripping proxy ---
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/embed")
|
||||||
|
async def embed_proxy(url: str = Query(..., description="Base64url-encoded embed URL")):
|
||||||
|
"""Proxy a third-party embed page so it can be iframed in our origin.
|
||||||
|
|
||||||
|
Strips X-Frame-Options and CSP frame-ancestors from the upstream
|
||||||
|
response, injects a base href + frame-buster-defeat script, and
|
||||||
|
forwards a plausible Referer/Origin to bypass upstream allowlists.
|
||||||
|
"""
|
||||||
|
body, headers, status_code = await fetch_embed(url)
|
||||||
|
return Response(content=body, headers=headers, status_code=status_code)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/embed-asset")
|
||||||
|
async def embed_asset(
|
||||||
|
request: Request,
|
||||||
|
url: str = Query(..., description="Base64url-encoded subresource URL"),
|
||||||
|
):
|
||||||
|
"""Relay an upstream subresource (JS/CSS/image/etc.) for the embed proxy.
|
||||||
|
|
||||||
|
Used as a fallback when an upstream blocks hotlinked assets via Origin
|
||||||
|
or Referer checks. Most assets load directly via the injected <base>
|
||||||
|
tag without going through this endpoint.
|
||||||
|
"""
|
||||||
|
range_header = request.headers.get("range")
|
||||||
|
stream_gen, headers, status_code = await relay_asset(url, range_header)
|
||||||
|
return StreamingResponse(stream_gen, headers=headers, status_code=status_code)
|
||||||
|
|
||||||
|
|
||||||
# --- Frontend Static Files ---
|
# --- Frontend Static Files ---
|
||||||
# Mount the SvelteKit static build AFTER all API routes so API endpoints take priority.
|
# Mount the SvelteKit static build AFTER all API routes so API endpoints take priority.
|
||||||
# SvelteKit adapter-static with ssr=false produces {page}.html files and a fallback index.html.
|
# SvelteKit adapter-static with ssr=false produces {page}.html files and a fallback index.html.
|
||||||
|
|
|
||||||
445
stacks/f1-stream/files/backend/playback_verifier.py
Normal file
445
stacks/f1-stream/files/backend/playback_verifier.py
Normal file
|
|
@ -0,0 +1,445 @@
|
||||||
|
"""Headless-browser playback verification for extracted streams.
|
||||||
|
|
||||||
|
The basic health checker (backend/health.py) only validates m3u8 syntax.
|
||||||
|
For embed/iframe streams it has nothing to check — the previous code blindly
|
||||||
|
marked every embed `is_live=True`, which meant the stream list was full of
|
||||||
|
news articles and aggregator landing pages that never actually played.
|
||||||
|
|
||||||
|
This module loads each candidate stream URL in headless Chromium (via
|
||||||
|
Playwright) and looks for *codec-independent* signals that the upstream
|
||||||
|
serves a playable stream:
|
||||||
|
|
||||||
|
- For m3u8: hls.js receives MANIFEST_PARSED + at least one FRAG_LOADED
|
||||||
|
event. We don't wait for `<video>` to gain dimensions, because Playwright's
|
||||||
|
chromium build doesn't include the H.264/AAC codecs. The user's real
|
||||||
|
browser does, so confirming "manifest + segment fetch succeed" is the
|
||||||
|
right server-side signal.
|
||||||
|
- For embed: a `<video>` element appears at top level OR inside the iframe
|
||||||
|
(the embed proxy strips X-Frame-Options + frame-buster JS so we can
|
||||||
|
introspect the iframe content), OR the player has set up a MediaSource.
|
||||||
|
|
||||||
|
Designed to be called from the extraction service's run_extraction()
|
||||||
|
hook, with bounded concurrency. Each verification typically takes
|
||||||
|
4-12 seconds.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Toggle off in development by setting PLAYBACK_VERIFY_ENABLED=false.
|
||||||
|
VERIFY_ENABLED = os.getenv("PLAYBACK_VERIFY_ENABLED", "true").lower() in ("true", "1", "yes")
|
||||||
|
|
||||||
|
# Maximum number of concurrent browser pages.
|
||||||
|
MAX_CONCURRENCY = int(os.getenv("PLAYBACK_VERIFY_CONCURRENCY", "2"))
|
||||||
|
|
||||||
|
# Per-stream verification budget (seconds). Beyond this we declare unplayable.
|
||||||
|
PER_STREAM_TIMEOUT = float(os.getenv("PLAYBACK_VERIFY_TIMEOUT", "20"))
|
||||||
|
|
||||||
|
# Where the embed proxy lives, used to wrap embed URLs so they bypass
|
||||||
|
# X-Frame-Options/CSP/JS frame-busters during verification. Defaults to
|
||||||
|
# loopback because verification runs inside the same FastAPI process.
|
||||||
|
PROXY_BASE = os.getenv("PLAYBACK_VERIFY_PROXY_BASE", "http://127.0.0.1:8000")
|
||||||
|
|
||||||
|
USER_AGENT = (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PlaybackVerdict:
|
||||||
|
is_playable: bool
|
||||||
|
signal: str = "" # which check triggered the positive verdict
|
||||||
|
elapsed_ms: int = 0
|
||||||
|
error: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
def _b64url(s: str) -> str:
|
||||||
|
"""URL-safe base64 with padding stripped — matches m3u8_rewriter.encode_url."""
|
||||||
|
return base64.urlsafe_b64encode(s.encode()).decode().rstrip("=")
|
||||||
|
|
||||||
|
|
||||||
|
def _hls_test_html(m3u8_url: str) -> str:
|
||||||
|
"""A self-contained HTML page that loads an m3u8 via hls.js into a <video>.
|
||||||
|
|
||||||
|
The page exposes window._verifier with manifest_parsed / frag_loaded
|
||||||
|
booleans the verifier polls. It also marks media-error or fatal-error
|
||||||
|
so we can distinguish 'upstream is unreachable' from 'codec missing'.
|
||||||
|
"""
|
||||||
|
return f"""<!doctype html>
|
||||||
|
<html><head><meta charset="utf-8"><title>verify</title>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/hls.js@1.5/dist/hls.min.js"></script>
|
||||||
|
</head><body>
|
||||||
|
<video id="v" muted playsinline width="640" height="360"></video>
|
||||||
|
<script>
|
||||||
|
window._verifier = {{
|
||||||
|
manifest_parsed: false,
|
||||||
|
frag_loaded: false,
|
||||||
|
media_loaded: false, // true when MSE has appended any buffer
|
||||||
|
fatal_network_error: false, // upstream truly unreachable
|
||||||
|
manifest_incompatible: false, // codec missing — separate from network reachability
|
||||||
|
hls_error_details: ""
|
||||||
|
}};
|
||||||
|
const v = document.getElementById('v');
|
||||||
|
const url = {m3u8_url!r};
|
||||||
|
function start() {{
|
||||||
|
if (window.Hls && Hls.isSupported()) {{
|
||||||
|
const hls = new Hls({{enableWorker: true}});
|
||||||
|
hls.on(Hls.Events.MANIFEST_PARSED, () => {{ window._verifier.manifest_parsed = true; }});
|
||||||
|
hls.on(Hls.Events.FRAG_LOADED, () => {{ window._verifier.frag_loaded = true; }});
|
||||||
|
hls.on(Hls.Events.BUFFER_APPENDED, () => {{ window._verifier.media_loaded = true; }});
|
||||||
|
hls.on(Hls.Events.ERROR, (_, d) => {{
|
||||||
|
window._verifier.hls_error_details = d.details || "";
|
||||||
|
if (d.fatal && d.type === Hls.ErrorTypes.NETWORK_ERROR) {{
|
||||||
|
window._verifier.fatal_network_error = true;
|
||||||
|
}}
|
||||||
|
if (d.details === Hls.ErrorDetails.MANIFEST_INCOMPATIBLE_CODECS_ERROR) {{
|
||||||
|
window._verifier.manifest_incompatible = true;
|
||||||
|
}}
|
||||||
|
}});
|
||||||
|
hls.loadSource(url);
|
||||||
|
hls.attachMedia(v);
|
||||||
|
}} else if (v.canPlayType('application/vnd.apple.mpegurl')) {{
|
||||||
|
v.src = url;
|
||||||
|
v.addEventListener('loadedmetadata', () => {{ window._verifier.manifest_parsed = true; window._verifier.frag_loaded = true; }});
|
||||||
|
v.addEventListener('error', () => {{ window._verifier.fatal_network_error = true; }});
|
||||||
|
}} else {{
|
||||||
|
window._verifier.hls_error_details = "no hls support";
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
window.addEventListener('load', start);
|
||||||
|
</script></body></html>"""
|
||||||
|
|
||||||
|
|
||||||
|
def _embed_test_html(_proxied_embed_url: str) -> str:
|
||||||
|
"""No longer used — verifier navigates the page directly to the proxy URL.
|
||||||
|
|
||||||
|
The earlier iframe-wrapper approach hit same-origin policy when inspecting
|
||||||
|
the iframe's contentDocument (the wrapper page was a data: URL, the iframe
|
||||||
|
was http://127.0.0.1:8000), so we couldn't read the embed's DOM.
|
||||||
|
"""
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
_M3U8_POLL_JS = """
|
||||||
|
() => {
|
||||||
|
const v = window._verifier || {};
|
||||||
|
const vid = document.querySelector('video');
|
||||||
|
return {
|
||||||
|
manifest_parsed: !!v.manifest_parsed,
|
||||||
|
frag_loaded: !!v.frag_loaded,
|
||||||
|
media_loaded: !!v.media_loaded,
|
||||||
|
fatal_network_error: !!v.fatal_network_error,
|
||||||
|
manifest_incompatible: !!v.manifest_incompatible,
|
||||||
|
hls_error_details: v.hls_error_details || "",
|
||||||
|
video_width: vid ? vid.videoWidth : 0,
|
||||||
|
video_ready: vid ? vid.readyState : 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
_EMBED_POLL_JS = """
|
||||||
|
() => {
|
||||||
|
try {
|
||||||
|
const vids = document.querySelectorAll('video');
|
||||||
|
if (vids.length > 0) {
|
||||||
|
const v = vids[0];
|
||||||
|
return {
|
||||||
|
has_video: true,
|
||||||
|
src: v.currentSrc || v.src || "",
|
||||||
|
width: v.videoWidth,
|
||||||
|
ready: v.readyState,
|
||||||
|
duration: isFinite(v.duration) ? v.duration : 0,
|
||||||
|
media_keys: !!v.mediaKeys,
|
||||||
|
sources: v.querySelectorAll('source').length,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
const player_divs = document.querySelectorAll(
|
||||||
|
'[id*="player" i], [class*="player" i], [class*="jwplayer" i], [id*="video" i], [class*="video-js" i]'
|
||||||
|
);
|
||||||
|
return {has_video: false, has_player_div: player_divs.length > 0};
|
||||||
|
} catch (e) {
|
||||||
|
return {has_video: false, has_player_div: false, err: String(e)};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def _verify_m3u8(page, m3u8_url: str, deadline: float) -> PlaybackVerdict:
|
||||||
|
"""Confirm an m3u8 URL is fetchable via hls.js end-to-end.
|
||||||
|
|
||||||
|
Positive signal hierarchy:
|
||||||
|
1. media_loaded (MSE buffer appended) — strongest, codec-supported.
|
||||||
|
2. frag_loaded (hls.js fetched at least one segment) — upstream is OK
|
||||||
|
even if the local browser lacks codecs.
|
||||||
|
3. manifest_parsed without media_loaded but with manifest_incompatible
|
||||||
|
— indicates upstream playlist is valid; player can't decode here
|
||||||
|
but a real user's browser will.
|
||||||
|
Negative signal:
|
||||||
|
- fatal_network_error: upstream is unreachable.
|
||||||
|
- timeout with no manifest_parsed: upstream did not respond.
|
||||||
|
"""
|
||||||
|
start = time.monotonic()
|
||||||
|
html = _hls_test_html(m3u8_url)
|
||||||
|
data_url = "data:text/html;base64," + base64.b64encode(html.encode()).decode()
|
||||||
|
|
||||||
|
try:
|
||||||
|
await page.goto(data_url, wait_until="domcontentloaded", timeout=10_000)
|
||||||
|
except Exception as e:
|
||||||
|
return PlaybackVerdict(
|
||||||
|
is_playable=False, error=f"goto failed: {e}",
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000),
|
||||||
|
)
|
||||||
|
|
||||||
|
last_state: dict = {}
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
try:
|
||||||
|
state = await page.evaluate(_M3U8_POLL_JS)
|
||||||
|
except Exception as e:
|
||||||
|
return PlaybackVerdict(
|
||||||
|
is_playable=False, error=f"evaluate failed: {e}",
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000),
|
||||||
|
)
|
||||||
|
last_state = state
|
||||||
|
if state.get("media_loaded"):
|
||||||
|
return PlaybackVerdict(
|
||||||
|
is_playable=True, signal="media_loaded",
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000),
|
||||||
|
)
|
||||||
|
if state.get("frag_loaded"):
|
||||||
|
return PlaybackVerdict(
|
||||||
|
is_playable=True, signal="frag_loaded",
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000),
|
||||||
|
)
|
||||||
|
# MANIFEST_INCOMPATIBLE_CODECS_ERROR fires after hls.js successfully
|
||||||
|
# fetched and parsed the manifest — the failure is purely local
|
||||||
|
# (chromium lacks H.264). The user's real browser has codecs, so
|
||||||
|
# this URL is playable from the user's perspective.
|
||||||
|
if state.get("manifest_incompatible"):
|
||||||
|
return PlaybackVerdict(
|
||||||
|
is_playable=True, signal="manifest_parsed_codec_missing_in_verifier",
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000),
|
||||||
|
)
|
||||||
|
if state.get("manifest_parsed"):
|
||||||
|
return PlaybackVerdict(
|
||||||
|
is_playable=True, signal="manifest_parsed",
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000),
|
||||||
|
)
|
||||||
|
if state.get("fatal_network_error"):
|
||||||
|
return PlaybackVerdict(
|
||||||
|
is_playable=False, error="upstream network error",
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000),
|
||||||
|
)
|
||||||
|
await asyncio.sleep(0.25)
|
||||||
|
|
||||||
|
err = "no playback signal"
|
||||||
|
if last_state.get("hls_error_details"):
|
||||||
|
err = f"hls.js error: {last_state['hls_error_details']}"
|
||||||
|
return PlaybackVerdict(
|
||||||
|
is_playable=False, error=err,
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _verify_embed(page, proxied_url: str, deadline: float) -> PlaybackVerdict:
|
||||||
|
"""Navigate directly to the proxied embed and confirm a player rendered.
|
||||||
|
|
||||||
|
Positive signals (in priority order):
|
||||||
|
- <video> with src/sources/mediaKeys set (player wired up).
|
||||||
|
- <video> element exists with any state (script ran, player attaching).
|
||||||
|
- A player container div (jwplayer, video-js, [id*=player], etc.).
|
||||||
|
|
||||||
|
Loading the embed page directly (not via iframe wrapper) avoids the
|
||||||
|
same-origin policy that prevented earlier iframe-introspection runs
|
||||||
|
from seeing the embed DOM.
|
||||||
|
"""
|
||||||
|
start = time.monotonic()
|
||||||
|
try:
|
||||||
|
await page.goto(proxied_url, wait_until="domcontentloaded", timeout=15_000)
|
||||||
|
except Exception as e:
|
||||||
|
return PlaybackVerdict(
|
||||||
|
is_playable=False, error=f"goto failed: {e}",
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Track the best state seen across all polls. Some embeds load a player
|
||||||
|
# div briefly then anti-bot JS tears the DOM down (hmembeds redirects
|
||||||
|
# to google.com if its devtool-detection trips). We accept any positive
|
||||||
|
# signal observed during the window, even if it's gone by timeout.
|
||||||
|
seen_video_wired = False
|
||||||
|
seen_video_tag = False
|
||||||
|
seen_player_div = False
|
||||||
|
last_err = ""
|
||||||
|
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
try:
|
||||||
|
r = await page.evaluate(_EMBED_POLL_JS)
|
||||||
|
except Exception as e:
|
||||||
|
return PlaybackVerdict(
|
||||||
|
is_playable=False, error=f"evaluate failed: {e}",
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000),
|
||||||
|
)
|
||||||
|
if r.get("has_video"):
|
||||||
|
seen_video_tag = True
|
||||||
|
if r.get("src") or r.get("width", 0) > 0 or r.get("media_keys") or r.get("sources", 0) > 0:
|
||||||
|
seen_video_wired = True
|
||||||
|
return PlaybackVerdict(
|
||||||
|
is_playable=True, signal="video.wired",
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000),
|
||||||
|
)
|
||||||
|
if r.get("has_player_div"):
|
||||||
|
seen_player_div = True
|
||||||
|
last_err = r.get("err", "")
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
|
if seen_video_wired:
|
||||||
|
return PlaybackVerdict(is_playable=True, signal="video.wired",
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000))
|
||||||
|
if seen_video_tag:
|
||||||
|
return PlaybackVerdict(is_playable=True, signal="video.tag_only",
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000))
|
||||||
|
if seen_player_div:
|
||||||
|
return PlaybackVerdict(is_playable=True, signal="player_div",
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000))
|
||||||
|
|
||||||
|
err = "no <video> or player container found"
|
||||||
|
if last_err:
|
||||||
|
err += f"; last_err: {last_err}"
|
||||||
|
return PlaybackVerdict(is_playable=False, error=err,
|
||||||
|
elapsed_ms=int((time.monotonic() - start) * 1000))
|
||||||
|
|
||||||
|
|
||||||
|
class PlaybackVerifier:
|
||||||
|
"""Verifies playability of m3u8 and embed URLs via headless Chromium.
|
||||||
|
|
||||||
|
Manages a single browser instance for the process lifetime (cheap per-page
|
||||||
|
contexts) and bounds concurrency with a semaphore.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._browser = None
|
||||||
|
self._playwright = None
|
||||||
|
self._sem = asyncio.Semaphore(MAX_CONCURRENCY)
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
|
||||||
|
async def _ensure_browser(self):
|
||||||
|
if self._browser is not None:
|
||||||
|
return self._browser
|
||||||
|
async with self._lock:
|
||||||
|
if self._browser is not None:
|
||||||
|
return self._browser
|
||||||
|
try:
|
||||||
|
from playwright.async_api import async_playwright
|
||||||
|
except ImportError:
|
||||||
|
logger.error("playwright not installed — playback verification disabled")
|
||||||
|
return None
|
||||||
|
self._playwright = await async_playwright().start()
|
||||||
|
self._browser = await self._playwright.chromium.launch(
|
||||||
|
headless=True,
|
||||||
|
args=[
|
||||||
|
"--disable-dev-shm-usage",
|
||||||
|
"--disable-web-security",
|
||||||
|
"--no-sandbox",
|
||||||
|
"--disable-setuid-sandbox",
|
||||||
|
"--disable-features=IsolateOrigins,site-per-process",
|
||||||
|
"--autoplay-policy=no-user-gesture-required",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
logger.info("Playwright browser launched (concurrency=%d)", MAX_CONCURRENCY)
|
||||||
|
return self._browser
|
||||||
|
|
||||||
|
async def shutdown(self) -> None:
|
||||||
|
if self._browser is not None:
|
||||||
|
try:
|
||||||
|
await self._browser.close()
|
||||||
|
except Exception:
|
||||||
|
logger.exception("error closing browser")
|
||||||
|
if self._playwright is not None:
|
||||||
|
try:
|
||||||
|
await self._playwright.stop()
|
||||||
|
except Exception:
|
||||||
|
logger.exception("error stopping playwright")
|
||||||
|
self._browser = None
|
||||||
|
self._playwright = None
|
||||||
|
|
||||||
|
async def verify(self, url: str, stream_type: str) -> PlaybackVerdict:
|
||||||
|
if not VERIFY_ENABLED:
|
||||||
|
return PlaybackVerdict(is_playable=True, error="disabled")
|
||||||
|
|
||||||
|
browser = await self._ensure_browser()
|
||||||
|
if browser is None:
|
||||||
|
return PlaybackVerdict(is_playable=False, error="playwright unavailable")
|
||||||
|
|
||||||
|
is_m3u8 = stream_type == "m3u8"
|
||||||
|
if not is_m3u8:
|
||||||
|
url = f"{PROXY_BASE}/embed?url={_b64url(url)}"
|
||||||
|
|
||||||
|
async with self._sem:
|
||||||
|
# Set the per-stream deadline AFTER acquiring the semaphore.
|
||||||
|
# Otherwise queued streams that wait behind earlier ones
|
||||||
|
# would have already-expired deadlines when they start.
|
||||||
|
deadline = time.monotonic() + PER_STREAM_TIMEOUT
|
||||||
|
try:
|
||||||
|
context = await browser.new_context(
|
||||||
|
user_agent=USER_AGENT,
|
||||||
|
viewport={"width": 1280, "height": 720},
|
||||||
|
bypass_csp=True,
|
||||||
|
)
|
||||||
|
page = await context.new_page()
|
||||||
|
except Exception as e:
|
||||||
|
return PlaybackVerdict(
|
||||||
|
is_playable=False, error=f"context create failed: {e}",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
if is_m3u8:
|
||||||
|
verdict = await _verify_m3u8(page, url, deadline)
|
||||||
|
else:
|
||||||
|
verdict = await _verify_embed(page, url, deadline)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
verdict = PlaybackVerdict(is_playable=False, error="overall timeout")
|
||||||
|
except Exception as e:
|
||||||
|
verdict = PlaybackVerdict(
|
||||||
|
is_playable=False, error=f"verify exception: {e}",
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
await page.close()
|
||||||
|
await context.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
logger.info(
|
||||||
|
"[verify] %s -> playable=%s signal=%s err=%s elapsed=%dms",
|
||||||
|
url[:120], verdict.is_playable, verdict.signal,
|
||||||
|
verdict.error, verdict.elapsed_ms,
|
||||||
|
)
|
||||||
|
return verdict
|
||||||
|
|
||||||
|
async def verify_many(self, items: list[tuple[str, str]]) -> dict[str, PlaybackVerdict]:
|
||||||
|
if not items:
|
||||||
|
return {}
|
||||||
|
if not VERIFY_ENABLED:
|
||||||
|
return {url: PlaybackVerdict(is_playable=True, error="disabled") for url, _ in items}
|
||||||
|
|
||||||
|
async def _run(url: str, stream_type: str):
|
||||||
|
verdict = await self.verify(url, stream_type)
|
||||||
|
return url, verdict
|
||||||
|
|
||||||
|
results = await asyncio.gather(
|
||||||
|
*[_run(url, st) for url, st in items], return_exceptions=True
|
||||||
|
)
|
||||||
|
out: dict[str, PlaybackVerdict] = {}
|
||||||
|
for r in results:
|
||||||
|
if isinstance(r, Exception):
|
||||||
|
logger.exception("verify task crashed: %s", r)
|
||||||
|
continue
|
||||||
|
url, verdict = r
|
||||||
|
out[url] = verdict
|
||||||
|
return out
|
||||||
|
|
@ -3,3 +3,4 @@ uvicorn[standard]
|
||||||
httpx>=0.27.0
|
httpx>=0.27.0
|
||||||
apscheduler>=3.10.0,<4.0
|
apscheduler>=3.10.0,<4.0
|
||||||
pydantic>=2.0.0
|
pydantic>=2.0.0
|
||||||
|
playwright==1.48.0
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,20 @@ export function getProxyUrl(m3u8Url) {
|
||||||
return `${API_BASE}/proxy?url=${encoded}`;
|
return `${API_BASE}/proxy?url=${encoded}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the embed-proxy URL for an upstream iframe embed page.
|
||||||
|
*
|
||||||
|
* The proxy strips X-Frame-Options / CSP frame-ancestors and injects a
|
||||||
|
* frame-buster-defeat script so the embed renders inside our iframe even
|
||||||
|
* when the upstream tries to block it.
|
||||||
|
* @param {string} embedUrl - The original embed page URL
|
||||||
|
* @returns {string} URL pointing at our /embed proxy
|
||||||
|
*/
|
||||||
|
export function getEmbedProxyUrl(embedUrl) {
|
||||||
|
const encoded = toBase64Url(embedUrl);
|
||||||
|
return `${API_BASE}/embed?url=${encoded}`;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mark a stream as actively being watched (enables token refresh).
|
* Mark a stream as actively being watched (enables token refresh).
|
||||||
* @param {string} url - The stream URL
|
* @param {string} url - The stream URL
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
<script>
|
<script>
|
||||||
import { fetchStreams, fetchSchedule, getProxyUrl, activateStream, deactivateStream } from '$lib/api.js';
|
import { fetchStreams, fetchSchedule, getProxyUrl, getEmbedProxyUrl, activateStream, deactivateStream } from '$lib/api.js';
|
||||||
import { onMount, onDestroy } from 'svelte';
|
import { onMount, onDestroy } from 'svelte';
|
||||||
import { page } from '$app/state';
|
import { page } from '$app/state';
|
||||||
|
|
||||||
|
|
@ -107,12 +107,14 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stream.stream_type === 'embed') {
|
if (stream.stream_type === 'embed') {
|
||||||
// Embed/iframe player — no hls.js needed
|
// Embed/iframe player — route through our /embed proxy so the
|
||||||
|
// upstream's X-Frame-Options / CSP / JS frame-busters can't
|
||||||
|
// block the iframe.
|
||||||
const newPlayer = {
|
const newPlayer = {
|
||||||
id: Date.now(),
|
id: Date.now(),
|
||||||
proxyUrl: '',
|
proxyUrl: '',
|
||||||
originalUrl: stream.embed_url,
|
originalUrl: stream.embed_url,
|
||||||
embedUrl: stream.embed_url,
|
embedUrl: getEmbedProxyUrl(stream.embed_url),
|
||||||
streamType: 'embed',
|
streamType: 'embed',
|
||||||
siteKey: stream.site_key || '',
|
siteKey: stream.site_key || '',
|
||||||
siteName: stream.site_name || stream.site_key || 'Unknown',
|
siteName: stream.site_name || stream.site_key || 'Unknown',
|
||||||
|
|
|
||||||
|
|
@ -104,11 +104,11 @@ resource "kubernetes_deployment" "f1-stream" {
|
||||||
name = "f1-stream"
|
name = "f1-stream"
|
||||||
resources {
|
resources {
|
||||||
limits = {
|
limits = {
|
||||||
memory = "256Mi"
|
memory = "1Gi"
|
||||||
}
|
}
|
||||||
requests = {
|
requests = {
|
||||||
cpu = "25m"
|
cpu = "100m"
|
||||||
memory = "256Mi"
|
memory = "1Gi"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
port {
|
port {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue