Restore f1-stream stack — undo accidental bundling into 63fe7d2b
Commit 63fe7d2b (fan-control) was made with a bare `git commit` in the shared infra working tree and inadvertently swept in a parallel session's staged f1-stream-extraction work (main.tf repoint, ~48 files/ removals, ci-cd.md + .claude docs, two extraction plan docs). This returns every f1-stream-related path to its pre-63fe7d2b state (3493c347) so that extraction can be committed cleanly by its own session. The fan-control files added in 63fe7d2b are untouched. [ci skip] Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
90ad6b9125
commit
147a8cff40
54 changed files with 9563 additions and 163 deletions
122
stacks/f1-stream/files/backend/extractors/aceztrims.py
Normal file
122
stacks/f1-stream/files/backend/extractors/aceztrims.py
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
"""Aceztrims extractor — scrapes embed URLs from acestrlms.pages.dev/f11/.
|
||||
|
||||
The page (Cloudflare Pages, no anti-bot) hosts an iframe + a strip of
|
||||
onclick channel-switcher buttons. Each button rewrites the iframe via
|
||||
`document.getElementById('iframe').src = '<embed_url>'`. The initial
|
||||
channel is hard-coded as `<iframe id='iframe' src='...'>`.
|
||||
|
||||
We strip HTML comments first because the page keeps ~20 legacy channel
|
||||
buttons inside `<!-- ... -->` blocks for easy re-enablement; the previous
|
||||
loose regex picked them up as false positives.
|
||||
|
||||
All channels are iframe embeds (no direct m3u8) — `stream_type='embed'`.
|
||||
|
||||
Site naming note: the extractor key stays `aceztrims` (the previous
|
||||
domain) so registry/cache identifiers don't churn. The current domain
|
||||
is `acestrlms.pages.dev` and the F1 path is `/f11/` (two ones — `/f1/`
|
||||
is the cross-sport schedule page and has no stream buttons).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
import httpx
|
||||
|
||||
from backend.extractors.base import BaseExtractor
|
||||
from backend.extractors.models import ExtractedStream
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
BASE_URL = "https://acestrlms.pages.dev"
|
||||
F1_PAGES = [
|
||||
("/f11/", "Formula 1"),
|
||||
]
|
||||
|
||||
USER_AGENT = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
# `document.getElementById('iframe').src = '<URL>'` — current channel-switcher format.
|
||||
_ONCLICK_IFRAME_SRC = re.compile(
|
||||
r"""document\.getElementById\(['"]iframe['"]\)\.src\s*=\s*['"]([^'"]+)['"]""",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
# `<iframe id='iframe' src='<URL>'>` — the default/initial channel.
|
||||
_DEFAULT_IFRAME = re.compile(
|
||||
r"""<iframe[^>]*id\s*=\s*['"]iframe['"][^>]*src\s*=\s*['"]([^'"]+)['"]""",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_HTML_COMMENT = re.compile(r"<!--.*?-->", re.DOTALL)
|
||||
|
||||
|
||||
class AceztrimsExtractor(BaseExtractor):
|
||||
"""Pulls iframe embed URLs out of the acestrlms.pages.dev F1 page."""
|
||||
|
||||
@property
|
||||
def site_key(self) -> str:
|
||||
return "aceztrims"
|
||||
|
||||
@property
|
||||
def site_name(self) -> str:
|
||||
return "Aceztrims"
|
||||
|
||||
async def extract(self) -> list[ExtractedStream]:
|
||||
streams: list[ExtractedStream] = []
|
||||
|
||||
async with httpx.AsyncClient(
|
||||
timeout=15.0,
|
||||
follow_redirects=True,
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
) as client:
|
||||
for path, category in F1_PAGES:
|
||||
try:
|
||||
streams.extend(await self._scrape_page(client, path, category))
|
||||
except Exception:
|
||||
logger.exception("[aceztrims] Failed to scrape %s", path)
|
||||
|
||||
logger.info("[aceztrims] Extracted %d stream(s)", len(streams))
|
||||
return streams
|
||||
|
||||
async def _scrape_page(
|
||||
self, client: httpx.AsyncClient, path: str, category: str
|
||||
) -> list[ExtractedStream]:
|
||||
url = f"{BASE_URL}{path}"
|
||||
resp = await client.get(url)
|
||||
if resp.status_code != 200:
|
||||
logger.warning(
|
||||
"[aceztrims] %s returned HTTP %d", path, resp.status_code
|
||||
)
|
||||
return []
|
||||
|
||||
# The page keeps a block of legacy channel buttons inside
|
||||
# `<!-- ... -->` for quick re-enablement. Strip comments first so
|
||||
# the regex only sees live buttons.
|
||||
html = _HTML_COMMENT.sub("", resp.text)
|
||||
|
||||
seen: set[str] = set()
|
||||
streams: list[ExtractedStream] = []
|
||||
|
||||
for pattern in (_DEFAULT_IFRAME, _ONCLICK_IFRAME_SRC):
|
||||
for match in pattern.finditer(html):
|
||||
embed_url = match.group(1).strip()
|
||||
if not embed_url or embed_url in seen:
|
||||
continue
|
||||
seen.add(embed_url)
|
||||
streams.append(
|
||||
ExtractedStream(
|
||||
url=embed_url,
|
||||
site_key=self.site_key,
|
||||
site_name=self.site_name,
|
||||
quality="",
|
||||
title=f"{category} Stream",
|
||||
stream_type="embed",
|
||||
embed_url=embed_url,
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"[aceztrims] Found %d stream(s) on %s", len(streams), path
|
||||
)
|
||||
return streams
|
||||
Loading…
Add table
Add a link
Reference in a new issue