Commit 63fe7d2b (fan-control) was made with a bare `git commit` in the shared infra working tree and inadvertently swept in a parallel session's staged f1-stream-extraction work (main.tf repoint, ~48 files/ removals, ci-cd.md + .claude docs, two extraction plan docs). This returns every f1-stream-related path to its pre-63fe7d2b state (3493c347) so that extraction can be committed cleanly by its own session. The fan-control files added in 63fe7d2b are untouched. [ci skip] Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
131 lines
4.7 KiB
Python
131 lines
4.7 KiB
Python
"""hmembeds.one decoder + extractor.
|
|
|
|
Reverse-engineered 2026-05-07 (4-agent parallel session). The hmembeds
|
|
embed page contains an inline `<script>` block of the form:
|
|
|
|
var k = "<16-char ASCII key>";
|
|
var b = atob("<URI-encoded XOR-encrypted blob>");
|
|
var c = decodeURIComponent(escape(b));
|
|
var d = "";
|
|
for (var i = 0; i < c.length; i++)
|
|
d += String.fromCharCode(c.charCodeAt(i) ^ k.charCodeAt(i % k.length));
|
|
(new Function(d))();
|
|
|
|
The decoded `d` is plain JavaScript that calls `jwplayer('player').setup({
|
|
file: <m3u8_url>, ... })`. The `<m3u8_url>` is a JWT-bound URL on
|
|
`amsterdam-0183.zulo-0084.online/sec/<JWT>/<embed_id>.m3u8` where the
|
|
JWT pins the request to a /24 of the requestor's IP.
|
|
|
|
So: pure client-side decoding. No fingerprint check, no canvas hash, no
|
|
browser-derived input. We can produce the m3u8 URL with curl + Python
|
|
faster than launching Chromium.
|
|
|
|
**Caveat (2026-05-07 reality)**: the hmembeds backend issues JWT URLs
|
|
for the curated `888520f3...` (Sky Sports F1 24/7) and `fc3a5463...`
|
|
(DAZN F1 24/7) embeds, but the origin (`amsterdam-0183.zulo-0084.online`)
|
|
returns 404/403 on the m3u8 fetch from any IP we tested (cluster IPv4
|
|
176.12.22.x, dev VM IPv6 2001:470:6f:43d::). Both legacy embed IDs
|
|
appear to be offline upstream. This extractor will produce JWT URLs
|
|
that the verifier marks unplayable for those specific embeds; if the
|
|
upstream broadcasts come back online or fresh IDs are added, the same
|
|
extractor logic just works.
|
|
"""
|
|
|
|
import base64
|
|
import logging
|
|
import re
|
|
import urllib.parse
|
|
|
|
import httpx
|
|
|
|
from backend.extractors.base import BaseExtractor
|
|
from backend.extractors.models import ExtractedStream
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
USER_AGENT = (
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
"AppleWebKit/605.1.15 (KHTML, like Gecko) "
|
|
"Version/17.4 Safari/605.1.15"
|
|
)
|
|
|
|
# Curated hmembeds embed IDs that the community treats as 24/7 channels.
|
|
# `_CHANNELS` mirrors the legacy `CuratedExtractor` list — keeping them
|
|
# here means the resolver can attempt offline-decoded JWT URLs and the
|
|
# verifier filters out the ones that are upstream-offline.
|
|
_CHANNELS = (
|
|
("888520f36cd94c5da4c71fddc1a5fc9b", "Sky Sports F1 (24/7) — hmembeds"),
|
|
("fc3a54634d0867b0c02ee3223292e7c6", "DAZN F1 (24/7) — hmembeds"),
|
|
)
|
|
|
|
_KEY_RE = re.compile(r'k\s*=\s*"([a-z0-9]+)"')
|
|
_BLOB_RE = re.compile(r'b\s*=\s*atob\("([^"]+)"\)')
|
|
_URL_RE = re.compile(r'streamUrl\s*=\s*"([^"]+)"')
|
|
|
|
|
|
def decode_embed(html: str) -> str | None:
|
|
"""Pull the m3u8 URL out of an hmembeds embed HTML.
|
|
|
|
Returns the JWT-bound m3u8 URL the page would tell JW Player to
|
|
play, or None if the page doesn't match the expected shape.
|
|
"""
|
|
km = _KEY_RE.search(html)
|
|
bm = _BLOB_RE.search(html)
|
|
if not km or not bm:
|
|
return None
|
|
key = km.group(1)
|
|
blob = bm.group(1)
|
|
try:
|
|
# b = atob(blob) — base64-decode bytes
|
|
# c = decodeURIComponent(escape(b)) — Latin-1 → UTF-8 round-trip
|
|
# d[i] = c[i] ^ k[i % len(k)] — XOR with rotating key
|
|
raw = base64.b64decode(blob).decode("latin-1")
|
|
deuri = urllib.parse.unquote(raw)
|
|
decoded = "".join(
|
|
chr(ord(c) ^ ord(key[i % len(key)])) for i, c in enumerate(deuri)
|
|
)
|
|
except Exception:
|
|
return None
|
|
m = _URL_RE.search(decoded)
|
|
return m.group(1) if m else None
|
|
|
|
|
|
class HmembedsExtractor(BaseExtractor):
|
|
@property
|
|
def site_key(self) -> str:
|
|
return "hmembeds"
|
|
|
|
@property
|
|
def site_name(self) -> str:
|
|
return "hmembeds.one"
|
|
|
|
async def extract(self) -> list[ExtractedStream]:
|
|
results: list[ExtractedStream] = []
|
|
async with httpx.AsyncClient(
|
|
timeout=15.0,
|
|
follow_redirects=True,
|
|
headers={"User-Agent": USER_AGENT, "Referer": "https://hmembeds.one/"},
|
|
) as client:
|
|
for embed_id, label in _CHANNELS:
|
|
try:
|
|
page = await client.get(f"https://hmembeds.one/embed/{embed_id}")
|
|
except Exception:
|
|
logger.debug("[hmembeds] embed %s fetch failed", embed_id, exc_info=True)
|
|
continue
|
|
if page.status_code != 200:
|
|
continue
|
|
m3u8 = decode_embed(page.text)
|
|
if not m3u8:
|
|
continue
|
|
results.append(
|
|
ExtractedStream(
|
|
url=m3u8,
|
|
site_key=self.site_key,
|
|
site_name=self.site_name,
|
|
quality="",
|
|
title=label,
|
|
stream_type="m3u8",
|
|
)
|
|
)
|
|
logger.info("[hmembeds] resolved %d JWT URL(s) (verifier filters dead origins)", len(results))
|
|
return results
|