2026-03-01 14:35:19 +00:00
|
|
|
"""Streamed.pk extractor - fetches F1/motorsport streams via public JSON API."""
|
|
|
|
|
|
|
|
|
|
import logging
|
|
|
|
|
|
|
|
|
|
import httpx
|
|
|
|
|
|
|
|
|
|
from backend.extractors.base import BaseExtractor
|
|
|
|
|
from backend.extractors.models import ExtractedStream
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
f1-stream: add chrome-browser, subreddit, dd12 extractors; fix streamed.pk
User asked to broaden the source pipeline so f1-stream can find F1 (and
adjacent motorsport) streams from Sky Sports / DAZN / Reddit / etc.,
using the in-cluster chrome-service headed browser where needed. Four
changes:
1. **streamed.py**: BASE_URL streamed.su → streamed.pk. The .su domain
stopped serving the API host in 2026 (only the marketing page is
left); .pk hosts the JSON API now. Adds 3 events/round (currently
all routed through embedsports.top — see #2 caveat).
2. **chrome_browser.py** (new): generic chrome-service-driven extractor.
Connects to the existing chrome-service WS (CHROME_WS_URL +
CHROME_WS_TOKEN env), navigates a list of TARGETS, captures any HLS
playlist URL the page fetches at runtime, returns one ExtractedStream
per discovery. Uses the same stealth init script as the verifier so
anti-bot checks don't trip the page. Handles iframes (DD12-style
/nas → /new-nas/jwplayer) and probes child-frame <video>/source
elements after settle. Caveat: most aggregator sites (pooembed,
embedsports, hmembeds, even DD12's JW Player path) use a broken
runtime decoder that produces no m3u8 in our environment, so the
TARGETS list is currently 0-yielding; the framework is the
contribution and concrete sites can be added as they're discovered.
3. **subreddit.py** (new): scans r/MotorsportsReplays, r/motorsports,
r/formula1, r/motogp via the public old.reddit.com JSON API for
posts whose flair/title indicates a live stream. Discovered URLs
are returned as embed-type streams; the verifier visits each via
chrome-service to confirm playability. Note: Reddit currently HTTP
403's our cluster outbound IP for anonymous JSON requests; the
extractor returns 0 in that state and logs a debug message. Will
work from any IP Reddit isn't blocking.
4. **dd12.py** (new): inline-HTML scraper for DD12Streams. The site
embeds `playerInstance.setup({file: "..."})` directly in HTML — no
JS decoder needed. Currently surfaces NASCAR Cup Series 24/7 (clean
BunnyCDN-hosted HLS at w9329432hnf3h34.b-cdn.net/pdfs/master.m3u8);
add new `(path, label, title)` tuples to CHANNELS as DD12 expands.
Result: /streams now shows 2 verified live streams (Rally TV via
pitsport + DD12 NASCAR Cup 24/7). When the next F1 weekend (Canadian
GP, May 22-24) goes live, pitsport will surface F1 sessions
automatically via the existing pushembdz path.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-07 16:05:25 +00:00
|
|
|
# Site renamed from streamed.su → streamed.pk in 2026; the .su domain
|
|
|
|
|
# stopped resolving the API host (only the marketing page is left).
|
|
|
|
|
BASE_URL = "https://streamed.pk"
|
2026-03-01 14:35:19 +00:00
|
|
|
USER_AGENT = (
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
|
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
|
|
|
"Chrome/120.0.0.0 Safari/537.36"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class StreamedExtractor(BaseExtractor):
|
|
|
|
|
"""Extracts streams from Streamed.pk's public JSON API.
|
|
|
|
|
|
|
|
|
|
Uses two endpoints:
|
|
|
|
|
- GET /api/matches/motor-sports → list of events with sources
|
|
|
|
|
- GET /api/stream/{source}/{id} → embed URL for a specific source
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def site_key(self) -> str:
|
|
|
|
|
return "streamed"
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def site_name(self) -> str:
|
|
|
|
|
return "Streamed"
|
|
|
|
|
|
|
|
|
|
async def extract(self) -> list[ExtractedStream]:
|
|
|
|
|
"""Fetch motorsport events and resolve embed URLs for each source."""
|
|
|
|
|
streams: list[ExtractedStream] = []
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
async with httpx.AsyncClient(
|
|
|
|
|
timeout=15.0,
|
|
|
|
|
follow_redirects=True,
|
|
|
|
|
headers={"User-Agent": USER_AGENT, "Accept": "application/json"},
|
|
|
|
|
) as client:
|
|
|
|
|
# Get motorsport events
|
|
|
|
|
resp = await client.get(f"{BASE_URL}/api/matches/motor-sports")
|
|
|
|
|
if resp.status_code != 200:
|
|
|
|
|
logger.warning(
|
|
|
|
|
"[streamed] Events API returned HTTP %d", resp.status_code
|
|
|
|
|
)
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
events = resp.json()
|
|
|
|
|
if not isinstance(events, list):
|
|
|
|
|
logger.warning("[streamed] Unexpected events response type")
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
logger.info("[streamed] Found %d motorsport event(s)", len(events))
|
|
|
|
|
|
|
|
|
|
for event in events:
|
|
|
|
|
title = event.get("title", "Unknown Event")
|
|
|
|
|
sources = event.get("sources", [])
|
|
|
|
|
if not sources:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
for source_info in sources:
|
|
|
|
|
source_name = source_info.get("source", "")
|
|
|
|
|
source_id = source_info.get("id", "")
|
|
|
|
|
if not source_name or not source_id:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
stream_resp = await client.get(
|
|
|
|
|
f"{BASE_URL}/api/stream/{source_name}/{source_id}"
|
|
|
|
|
)
|
|
|
|
|
if stream_resp.status_code != 200:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
stream_data = stream_resp.json()
|
|
|
|
|
if not isinstance(stream_data, list):
|
|
|
|
|
stream_data = [stream_data]
|
|
|
|
|
|
|
|
|
|
for item in stream_data:
|
|
|
|
|
embed_url = item.get("embedUrl", "")
|
|
|
|
|
if not embed_url:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
language = item.get("language", "")
|
|
|
|
|
hd = item.get("hd", False)
|
|
|
|
|
stream_no = item.get("streamNo", 1)
|
|
|
|
|
|
|
|
|
|
quality = "HD" if hd else "SD"
|
|
|
|
|
stream_title = f"{title}"
|
|
|
|
|
if language:
|
|
|
|
|
stream_title += f" ({language})"
|
|
|
|
|
if stream_no > 1:
|
|
|
|
|
stream_title += f" #{stream_no}"
|
|
|
|
|
|
|
|
|
|
streams.append(
|
|
|
|
|
ExtractedStream(
|
|
|
|
|
url=embed_url,
|
|
|
|
|
site_key=self.site_key,
|
|
|
|
|
site_name=self.site_name,
|
|
|
|
|
quality=quality,
|
|
|
|
|
title=stream_title,
|
|
|
|
|
stream_type="embed",
|
|
|
|
|
embed_url=embed_url,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
logger.debug(
|
|
|
|
|
"[streamed] Failed to fetch stream for %s/%s",
|
|
|
|
|
source_name,
|
|
|
|
|
source_id,
|
|
|
|
|
exc_info=True,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
except Exception:
|
|
|
|
|
logger.exception("[streamed] Failed to fetch events")
|
|
|
|
|
|
|
|
|
|
logger.info("[streamed] Extracted %d stream(s)", len(streams))
|
|
|
|
|
return streams
|