Restore f1-stream stack — undo accidental bundling into 63fe7d2b

Commit 63fe7d2b (fan-control) was made with a bare `git commit` in the shared infra working tree and inadvertently swept in a parallel session's staged f1-stream-extraction work (main.tf repoint, ~48 files/ removals, ci-cd.md + .claude docs, two extraction plan docs). This returns every f1-stream-related path to its pre-63fe7d2b state (3493c347) so that extraction can be committed cleanly by its own session. The fan-control files added in 63fe7d2b are untouched. [ci skip] Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-04 21:47:20 +00:00 · 2026-06-04 21:47:20 +00:00 · 147a8cff40
commit 147a8cff40
parent 90ad6b9125
54 changed files with 9563 additions and 163 deletions
--- a/stacks/f1-stream/files/backend/extractors/aceztrims.py
+++ b/stacks/f1-stream/files/backend/extractors/aceztrims.py
@ -0,0 +1,122 @@
+"""Aceztrims extractor — scrapes embed URLs from acestrlms.pages.dev/f11/.
+
+The page (Cloudflare Pages, no anti-bot) hosts an iframe + a strip of
+onclick channel-switcher buttons. Each button rewrites the iframe via
+`document.getElementById('iframe').src = '<embed_url>'`. The initial
+channel is hard-coded as `<iframe id='iframe' src='...'>`.
+
+We strip HTML comments first because the page keeps ~20 legacy channel
+buttons inside `<!-- ... -->` blocks for easy re-enablement; the previous
+loose regex picked them up as false positives.
+
+All channels are iframe embeds (no direct m3u8) — `stream_type='embed'`.
+
+Site naming note: the extractor key stays `aceztrims` (the previous
+domain) so registry/cache identifiers don't churn. The current domain
+is `acestrlms.pages.dev` and the F1 path is `/f11/` (two ones — `/f1/`
+is the cross-sport schedule page and has no stream buttons).
+"""
+
+import logging
+import re
+
+import httpx
+
+from backend.extractors.base import BaseExtractor
+from backend.extractors.models import ExtractedStream
+
+logger = logging.getLogger(__name__)
+
+BASE_URL = "https://acestrlms.pages.dev"
+F1_PAGES = [
+    ("/f11/", "Formula 1"),
+]
+
+USER_AGENT = (
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/120.0.0.0 Safari/537.36"
+)
+
+# `document.getElementById('iframe').src = '<URL>'` — current channel-switcher format.
+_ONCLICK_IFRAME_SRC = re.compile(
+    r"""document\.getElementById\(['"]iframe['"]\)\.src\s*=\s*['"]([^'"]+)['"]""",
+    re.IGNORECASE,
+)
+# `<iframe id='iframe' src='<URL>'>` — the default/initial channel.
+_DEFAULT_IFRAME = re.compile(
+    r"""<iframe[^>]*id\s*=\s*['"]iframe['"][^>]*src\s*=\s*['"]([^'"]+)['"]""",
+    re.IGNORECASE,
+)
+_HTML_COMMENT = re.compile(r"<!--.*?-->", re.DOTALL)
+
+
+class AceztrimsExtractor(BaseExtractor):
+    """Pulls iframe embed URLs out of the acestrlms.pages.dev F1 page."""
+
+    @property
+    def site_key(self) -> str:
+        return "aceztrims"
+
+    @property
+    def site_name(self) -> str:
+        return "Aceztrims"
+
+    async def extract(self) -> list[ExtractedStream]:
+        streams: list[ExtractedStream] = []
+
+        async with httpx.AsyncClient(
+            timeout=15.0,
+            follow_redirects=True,
+            headers={"User-Agent": USER_AGENT},
+        ) as client:
+            for path, category in F1_PAGES:
+                try:
+                    streams.extend(await self._scrape_page(client, path, category))
+                except Exception:
+                    logger.exception("[aceztrims] Failed to scrape %s", path)
+
+        logger.info("[aceztrims] Extracted %d stream(s)", len(streams))
+        return streams
+
+    async def _scrape_page(
+        self, client: httpx.AsyncClient, path: str, category: str
+    ) -> list[ExtractedStream]:
+        url = f"{BASE_URL}{path}"
+        resp = await client.get(url)
+        if resp.status_code != 200:
+            logger.warning(
+                "[aceztrims] %s returned HTTP %d", path, resp.status_code
+            )
+            return []
+
+        # The page keeps a block of legacy channel buttons inside
+        # `<!-- ... -->` for quick re-enablement. Strip comments first so
+        # the regex only sees live buttons.
+        html = _HTML_COMMENT.sub("", resp.text)
+
+        seen: set[str] = set()
+        streams: list[ExtractedStream] = []
+
+        for pattern in (_DEFAULT_IFRAME, _ONCLICK_IFRAME_SRC):
+            for match in pattern.finditer(html):
+                embed_url = match.group(1).strip()
+                if not embed_url or embed_url in seen:
+                    continue
+                seen.add(embed_url)
+                streams.append(
+                    ExtractedStream(
+                        url=embed_url,
+                        site_key=self.site_key,
+                        site_name=self.site_name,
+                        quality="",
+                        title=f"{category} Stream",
+                        stream_type="embed",
+                        embed_url=embed_url,
+                    )
+                )
+
+        logger.info(
+            "[aceztrims] Found %d stream(s) on %s", len(streams), path
+        )
+        return streams