infra/stacks/f1-stream/files/backend/extractors/aceztrims.py
Viktor Barzin 51b8081594 f1-stream: add real F1 stream extractors and iframe player support
Add three new extractors (Streamed.pk, DaddyLive, Aceztrims) for live
F1 streams. Extend ExtractedStream model with stream_type/embed_url
fields, skip health checks for embed streams, fix broken Akamai demo
stream, add variant playlist validation, and add iframe player support
in the frontend for embed-type streams.
2026-03-01 14:35:19 +00:00

154 lines
4.7 KiB
Python

"""Aceztrims extractor - scrapes F1 streaming links from Aceztrims pages.
Parses HTML for iframe button onclick handlers and extracts streams from:
- /iframe1?s=<m3u8_url> → direct m3u8
- https://pooembed.eu/embed/... → embed URL
"""
import logging
import re
from urllib.parse import parse_qs, urlparse
import httpx
from backend.extractors.base import BaseExtractor
from backend.extractors.models import ExtractedStream
logger = logging.getLogger(__name__)
BASE_URL = "https://acestrlms.pages.dev"
# Pages to scrape for streams
F1_PAGES = [
("/f1/", "Formula 1"),
]
USER_AGENT = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
)
class AceztrimsExtractor(BaseExtractor):
"""Extracts streams from Aceztrims pages by parsing HTML for iframe URLs.
Looks for onclick handlers on buttons/links that open iframes, and
extracts the stream URLs from them.
"""
@property
def site_key(self) -> str:
return "aceztrims"
@property
def site_name(self) -> str:
return "Aceztrims"
async def extract(self) -> list[ExtractedStream]:
"""Scrape all configured F1 pages for stream URLs."""
streams: list[ExtractedStream] = []
async with httpx.AsyncClient(
timeout=15.0,
follow_redirects=True,
headers={"User-Agent": USER_AGENT},
) as client:
for path, category in F1_PAGES:
try:
page_streams = await self._scrape_page(client, path, category)
streams.extend(page_streams)
except Exception:
logger.exception(
"[aceztrims] Failed to scrape page %s", path
)
logger.info("[aceztrims] Extracted %d stream(s)", len(streams))
return streams
async def _scrape_page(
self, client: httpx.AsyncClient, path: str, category: str
) -> list[ExtractedStream]:
"""Scrape a single page for stream URLs."""
url = f"{BASE_URL}{path}"
resp = await client.get(url)
if resp.status_code != 200:
logger.warning(
"[aceztrims] Page %s returned HTTP %d", path, resp.status_code
)
return []
html = resp.text
streams: list[ExtractedStream] = []
seen_urls: set[str] = set()
# Pattern 1: /iframe1?s=<m3u8_url> — direct m3u8
iframe1_pattern = re.compile(
r"""['"]((?:https?://[^'"]*)?/iframe1\?s=([^'"&]+))['""]""",
re.IGNORECASE,
)
for match in iframe1_pattern.finditer(html):
m3u8_url = match.group(2)
if m3u8_url in seen_urls:
continue
seen_urls.add(m3u8_url)
streams.append(
ExtractedStream(
url=m3u8_url,
site_key=self.site_key,
site_name=self.site_name,
quality="",
title=f"{category} Stream",
stream_type="m3u8",
)
)
# Pattern 2: embed URLs (pooembed.eu or similar)
embed_pattern = re.compile(
r"""['"]((https?://(?:pooembed\.eu|[^'"]*embed)[^'"]*))['"]""",
re.IGNORECASE,
)
for match in embed_pattern.finditer(html):
embed_url = match.group(1)
if embed_url in seen_urls:
continue
seen_urls.add(embed_url)
streams.append(
ExtractedStream(
url=embed_url,
site_key=self.site_key,
site_name=self.site_name,
quality="",
title=f"{category} Stream (Embed)",
stream_type="embed",
embed_url=embed_url,
)
)
# Pattern 3: Generic onclick handlers with URLs
onclick_pattern = re.compile(
r"""onclick\s*=\s*['"].*?['"]?(https?://[^'")\s]+\.m3u8[^'")\s]*)['"]?""",
re.IGNORECASE,
)
for match in onclick_pattern.finditer(html):
m3u8_url = match.group(1)
if m3u8_url in seen_urls:
continue
seen_urls.add(m3u8_url)
streams.append(
ExtractedStream(
url=m3u8_url,
site_key=self.site_key,
site_name=self.site_name,
quality="",
title=f"{category} Stream",
stream_type="m3u8",
)
)
logger.info(
"[aceztrims] Found %d stream(s) on %s", len(streams), path
)
return streams