infra/stacks/f1-stream/files/backend/extractors/streamed.py

"""Streamed.pk extractor - fetches F1/motorsport streams via public JSON API."""

import logging

import httpx

from backend.extractors.base import BaseExtractor
from backend.extractors.models import ExtractedStream

logger = logging.getLogger(__name__)

# Site renamed from streamed.su → streamed.pk in 2026; the .su domain
# stopped resolving the API host (only the marketing page is left).
BASE_URL = "https://streamed.pk"
USER_AGENT = (
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
    "AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/120.0.0.0 Safari/537.36"
)


class StreamedExtractor(BaseExtractor):
    """Extracts streams from Streamed.pk's public JSON API.

    Uses two endpoints:
    - GET /api/matches/motor-sports → list of events with sources
    - GET /api/stream/{source}/{id} → embed URL for a specific source
    """

    @property
    def site_key(self) -> str:
        return "streamed"

    @property
    def site_name(self) -> str:
        return "Streamed"

    async def extract(self) -> list[ExtractedStream]:
        """Fetch motorsport events and resolve embed URLs for each source."""
        streams: list[ExtractedStream] = []

        try:
            async with httpx.AsyncClient(
                timeout=15.0,
                follow_redirects=True,
                headers={"User-Agent": USER_AGENT, "Accept": "application/json"},
            ) as client:
                # Get motorsport events
                resp = await client.get(f"{BASE_URL}/api/matches/motor-sports")
                if resp.status_code != 200:
                    logger.warning(
                        "[streamed] Events API returned HTTP %d", resp.status_code
                    )
                    return []

                events = resp.json()
                if not isinstance(events, list):
                    logger.warning("[streamed] Unexpected events response type")
                    return []

                logger.info("[streamed] Found %d motorsport event(s)", len(events))

                for event in events:
                    title = event.get("title", "Unknown Event")
                    sources = event.get("sources", [])
                    if not sources:
                        continue

                    for source_info in sources:
                        source_name = source_info.get("source", "")
                        source_id = source_info.get("id", "")
                        if not source_name or not source_id:
                            continue

                        try:
                            stream_resp = await client.get(
                                f"{BASE_URL}/api/stream/{source_name}/{source_id}"
                            )
                            if stream_resp.status_code != 200:
                                continue

                            stream_data = stream_resp.json()
                            if not isinstance(stream_data, list):
                                stream_data = [stream_data]

                            for item in stream_data:
                                embed_url = item.get("embedUrl", "")
                                if not embed_url:
                                    continue

                                language = item.get("language", "")
                                hd = item.get("hd", False)
                                stream_no = item.get("streamNo", 1)

                                quality = "HD" if hd else "SD"
                                stream_title = f"{title}"
                                if language:
                                    stream_title += f" ({language})"
                                if stream_no > 1:
                                    stream_title += f" #{stream_no}"

                                streams.append(
                                    ExtractedStream(
                                        url=embed_url,
                                        site_key=self.site_key,
                                        site_name=self.site_name,
                                        quality=quality,
                                        title=stream_title,
                                        stream_type="embed",
                                        embed_url=embed_url,
                                    )
                                )
                        except Exception:
                            logger.debug(
                                "[streamed] Failed to fetch stream for %s/%s",
                                source_name,
                                source_id,
                                exc_info=True,
                            )

        except Exception:
            logger.exception("[streamed] Failed to fetch events")

        logger.info("[streamed] Extracted %d stream(s)", len(streams))
        return streams
f1-stream: add real F1 stream extractors and iframe player support Add three new extractors (Streamed.pk, DaddyLive, Aceztrims) for live F1 streams. Extend ExtractedStream model with stream_type/embed_url fields, skip health checks for embed streams, fix broken Akamai demo stream, add variant playlist validation, and add iframe player support in the frontend for embed-type streams. 2026-03-01 14:35:19 +00:00			`"""Streamed.pk extractor - fetches F1/motorsport streams via public JSON API."""`

			`import logging`

			`import httpx`

			`from backend.extractors.base import BaseExtractor`
			`from backend.extractors.models import ExtractedStream`

			`logger = logging.getLogger(__name__)`

f1-stream: add chrome-browser, subreddit, dd12 extractors; fix streamed.pk User asked to broaden the source pipeline so f1-stream can find F1 (and adjacent motorsport) streams from Sky Sports / DAZN / Reddit / etc., using the in-cluster chrome-service headed browser where needed. Four changes: 1. streamed.py: BASE_URL streamed.su → streamed.pk. The .su domain stopped serving the API host in 2026 (only the marketing page is left); .pk hosts the JSON API now. Adds 3 events/round (currently all routed through embedsports.top — see #2 caveat). 2. chrome_browser.py (new): generic chrome-service-driven extractor. Connects to the existing chrome-service WS (CHROME_WS_URL + CHROME_WS_TOKEN env), navigates a list of TARGETS, captures any HLS playlist URL the page fetches at runtime, returns one ExtractedStream per discovery. Uses the same stealth init script as the verifier so anti-bot checks don't trip the page. Handles iframes (DD12-style /nas → /new-nas/jwplayer) and probes child-frame <video>/source elements after settle. Caveat: most aggregator sites (pooembed, embedsports, hmembeds, even DD12's JW Player path) use a broken runtime decoder that produces no m3u8 in our environment, so the TARGETS list is currently 0-yielding; the framework is the contribution and concrete sites can be added as they're discovered. 3. subreddit.py (new): scans r/MotorsportsReplays, r/motorsports, r/formula1, r/motogp via the public old.reddit.com JSON API for posts whose flair/title indicates a live stream. Discovered URLs are returned as embed-type streams; the verifier visits each via chrome-service to confirm playability. Note: Reddit currently HTTP 403's our cluster outbound IP for anonymous JSON requests; the extractor returns 0 in that state and logs a debug message. Will work from any IP Reddit isn't blocking. 4. dd12.py (new): inline-HTML scraper for DD12Streams. The site embeds `playerInstance.setup({file: "..."})` directly in HTML — no JS decoder needed. Currently surfaces NASCAR Cup Series 24/7 (clean BunnyCDN-hosted HLS at w9329432hnf3h34.b-cdn.net/pdfs/master.m3u8); add new `(path, label, title)` tuples to CHANNELS as DD12 expands. Result: /streams now shows 2 verified live streams (Rally TV via pitsport + DD12 NASCAR Cup 24/7). When the next F1 weekend (Canadian GP, May 22-24) goes live, pitsport will surface F1 sessions automatically via the existing pushembdz path. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> 2026-05-07 16:05:25 +00:00			`# Site renamed from streamed.su → streamed.pk in 2026; the .su domain`
			`# stopped resolving the API host (only the marketing page is left).`
			`BASE_URL = "https://streamed.pk"`
f1-stream: add real F1 stream extractors and iframe player support Add three new extractors (Streamed.pk, DaddyLive, Aceztrims) for live F1 streams. Extend ExtractedStream model with stream_type/embed_url fields, skip health checks for embed streams, fix broken Akamai demo stream, add variant playlist validation, and add iframe player support in the frontend for embed-type streams. 2026-03-01 14:35:19 +00:00			`USER_AGENT = (`
			`"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "`
			`"AppleWebKit/537.36 (KHTML, like Gecko) "`
			`"Chrome/120.0.0.0 Safari/537.36"`
			`)`


			`class StreamedExtractor(BaseExtractor):`
			`"""Extracts streams from Streamed.pk's public JSON API.`

			`Uses two endpoints:`
			`- GET /api/matches/motor-sports → list of events with sources`
			`- GET /api/stream/{source}/{id} → embed URL for a specific source`
			`"""`

			`@property`
			`def site_key(self) -> str:`
			`return "streamed"`

			`@property`
			`def site_name(self) -> str:`
			`return "Streamed"`

			`async def extract(self) -> list[ExtractedStream]:`
			`"""Fetch motorsport events and resolve embed URLs for each source."""`
			`streams: list[ExtractedStream] = []`

			`try:`
			`async with httpx.AsyncClient(`
			`timeout=15.0,`
			`follow_redirects=True,`
			`headers={"User-Agent": USER_AGENT, "Accept": "application/json"},`
			`) as client:`
			`# Get motorsport events`
			`resp = await client.get(f"{BASE_URL}/api/matches/motor-sports")`
			`if resp.status_code != 200:`
			`logger.warning(`
			`"[streamed] Events API returned HTTP %d", resp.status_code`
			`)`
			`return []`

			`events = resp.json()`
			`if not isinstance(events, list):`
			`logger.warning("[streamed] Unexpected events response type")`
			`return []`

			`logger.info("[streamed] Found %d motorsport event(s)", len(events))`

			`for event in events:`
			`title = event.get("title", "Unknown Event")`
			`sources = event.get("sources", [])`
			`if not sources:`
			`continue`

			`for source_info in sources:`
			`source_name = source_info.get("source", "")`
			`source_id = source_info.get("id", "")`
			`if not source_name or not source_id:`
			`continue`

			`try:`
			`stream_resp = await client.get(`
			`f"{BASE_URL}/api/stream/{source_name}/{source_id}"`
			`)`
			`if stream_resp.status_code != 200:`
			`continue`

			`stream_data = stream_resp.json()`
			`if not isinstance(stream_data, list):`
			`stream_data = [stream_data]`

			`for item in stream_data:`
			`embed_url = item.get("embedUrl", "")`
			`if not embed_url:`
			`continue`

			`language = item.get("language", "")`
			`hd = item.get("hd", False)`
			`stream_no = item.get("streamNo", 1)`

			`quality = "HD" if hd else "SD"`
			`stream_title = f"{title}"`
			`if language:`
			`stream_title += f" ({language})"`
			`if stream_no > 1:`
			`stream_title += f" #{stream_no}"`

			`streams.append(`
			`ExtractedStream(`
			`url=embed_url,`
			`site_key=self.site_key,`
			`site_name=self.site_name,`
			`quality=quality,`
			`title=stream_title,`
			`stream_type="embed",`
			`embed_url=embed_url,`
			`)`
			`)`
			`except Exception:`
			`logger.debug(`
			`"[streamed] Failed to fetch stream for %s/%s",`
			`source_name,`
			`source_id,`
			`exc_info=True,`
			`)`

			`except Exception:`
			`logger.exception("[streamed] Failed to fetch events")`

			`logger.info("[streamed] Extracted %d stream(s)", len(streams))`
			`return streams`