infra/stacks/f1-stream/files/backend/extractors/hmembeds.py

132 lines
4.7 KiB
Python
Raw Normal View History

"""hmembeds.one decoder + extractor.
Reverse-engineered 2026-05-07 (4-agent parallel session). The hmembeds
embed page contains an inline `<script>` block of the form:
var k = "<16-char ASCII key>";
var b = atob("<URI-encoded XOR-encrypted blob>");
var c = decodeURIComponent(escape(b));
var d = "";
for (var i = 0; i < c.length; i++)
d += String.fromCharCode(c.charCodeAt(i) ^ k.charCodeAt(i % k.length));
(new Function(d))();
The decoded `d` is plain JavaScript that calls `jwplayer('player').setup({
file: <m3u8_url>, ... })`. The `<m3u8_url>` is a JWT-bound URL on
`amsterdam-0183.zulo-0084.online/sec/<JWT>/<embed_id>.m3u8` where the
JWT pins the request to a /24 of the requestor's IP.
So: pure client-side decoding. No fingerprint check, no canvas hash, no
browser-derived input. We can produce the m3u8 URL with curl + Python
faster than launching Chromium.
**Caveat (2026-05-07 reality)**: the hmembeds backend issues JWT URLs
for the curated `888520f3...` (Sky Sports F1 24/7) and `fc3a5463...`
(DAZN F1 24/7) embeds, but the origin (`amsterdam-0183.zulo-0084.online`)
returns 404/403 on the m3u8 fetch from any IP we tested (cluster IPv4
176.12.22.x, dev VM IPv6 2001:470:6f:43d::). Both legacy embed IDs
appear to be offline upstream. This extractor will produce JWT URLs
that the verifier marks unplayable for those specific embeds; if the
upstream broadcasts come back online or fresh IDs are added, the same
extractor logic just works.
"""
import base64
import logging
import re
import urllib.parse
import httpx
from backend.extractors.base import BaseExtractor
from backend.extractors.models import ExtractedStream
logger = logging.getLogger(__name__)
USER_AGENT = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/605.1.15 (KHTML, like Gecko) "
"Version/17.4 Safari/605.1.15"
)
# Curated hmembeds embed IDs that the community treats as 24/7 channels.
# `_CHANNELS` mirrors the legacy `CuratedExtractor` list — keeping them
# here means the resolver can attempt offline-decoded JWT URLs and the
# verifier filters out the ones that are upstream-offline.
_CHANNELS = (
("888520f36cd94c5da4c71fddc1a5fc9b", "Sky Sports F1 (24/7) — hmembeds"),
("fc3a54634d0867b0c02ee3223292e7c6", "DAZN F1 (24/7) — hmembeds"),
)
_KEY_RE = re.compile(r'k\s*=\s*"([a-z0-9]+)"')
_BLOB_RE = re.compile(r'b\s*=\s*atob\("([^"]+)"\)')
_URL_RE = re.compile(r'streamUrl\s*=\s*"([^"]+)"')
def decode_embed(html: str) -> str | None:
"""Pull the m3u8 URL out of an hmembeds embed HTML.
Returns the JWT-bound m3u8 URL the page would tell JW Player to
play, or None if the page doesn't match the expected shape.
"""
km = _KEY_RE.search(html)
bm = _BLOB_RE.search(html)
if not km or not bm:
return None
key = km.group(1)
blob = bm.group(1)
try:
# b = atob(blob) — base64-decode bytes
# c = decodeURIComponent(escape(b)) — Latin-1 → UTF-8 round-trip
# d[i] = c[i] ^ k[i % len(k)] — XOR with rotating key
raw = base64.b64decode(blob).decode("latin-1")
deuri = urllib.parse.unquote(raw)
decoded = "".join(
chr(ord(c) ^ ord(key[i % len(key)])) for i, c in enumerate(deuri)
)
except Exception:
return None
m = _URL_RE.search(decoded)
return m.group(1) if m else None
class HmembedsExtractor(BaseExtractor):
@property
def site_key(self) -> str:
return "hmembeds"
@property
def site_name(self) -> str:
return "hmembeds.one"
async def extract(self) -> list[ExtractedStream]:
results: list[ExtractedStream] = []
async with httpx.AsyncClient(
timeout=15.0,
follow_redirects=True,
headers={"User-Agent": USER_AGENT, "Referer": "https://hmembeds.one/"},
) as client:
for embed_id, label in _CHANNELS:
try:
page = await client.get(f"https://hmembeds.one/embed/{embed_id}")
except Exception:
logger.debug("[hmembeds] embed %s fetch failed", embed_id, exc_info=True)
continue
if page.status_code != 200:
continue
m3u8 = decode_embed(page.text)
if not m3u8:
continue
results.append(
ExtractedStream(
url=m3u8,
site_key=self.site_key,
site_name=self.site_name,
quality="",
title=label,
stream_type="m3u8",
)
)
logger.info("[hmembeds] resolved %d JWT URL(s) (verifier filters dead origins)", len(results))
return results