f1-stream: drop demo + landing-page extractors, add fetch-proxy injection
Per user feedback: the demo Big Buck Bunny / Apple test streams aren't useful in an F1-streams app. Removed DemoExtractor entirely. Tightened the discord-extractor path filter from "any stream-shaped path" to "direct embed/player path only" — the previous filter still let sportsurge `/event/...` landing pages through, which the verifier mistook for playable because they render player-class divs without a real player. Embed proxy now also rewrites window.fetch + XMLHttpRequest.open inside the upstream HTML so that cross-origin XHRs (e.g. the hmembeds `/sec/<JWT>` token-binding endpoint) go through our /embed-asset relay. This avoids the CORS reject that fired when the player JS tried to call hghndasw.gbgdhdffhf.shop/sec/... from an `f1.viktorbarzin.me` origin. The verifier now requires a `<video>` element to mark embed streams playable (not just a player-class div). Curated streams bypass the verifier — hmembeds aggressively detects headless Chromium (devtool trap, console-clear timing, automation flags) and won't progress past JW Player init in our pod, but the user's real browser should clear those checks. We can't honestly headless-verify hmembeds, so we trust the curator instead of falsely rejecting them. Image: viktorbarzin/f1-stream:v6.1.1
This commit is contained in:
parent
f90d79ed4e
commit
574cdf08d2
5 changed files with 87 additions and 27 deletions
|
|
@ -96,6 +96,63 @@ _FRAME_BUSTER_DEFEAT_TEMPLATE = """
|
|||
loc.assign = function(u){{ if (typeof u === 'string' && u.indexOf('google.com') !== -1) return; if (origAssign) origAssign(u); }};
|
||||
loc.replace = function(u){{ if (typeof u === 'string' && u.indexOf('google.com') !== -1) return; if (origReplace) origReplace(u); }};
|
||||
}} catch (e) {{}}
|
||||
|
||||
// Route all cross-origin fetch/XHR requests through our /embed-asset
|
||||
// proxy. The hmembeds player calls a token-binding endpoint
|
||||
// (hghndasw.gbgdhdffhf.shop/sec/<JWT>) that CORS-rejects requests from
|
||||
// any origin other than hmembeds.one. By rewriting the URL to
|
||||
// /embed-asset?url=..., the browser fetches our same-origin endpoint
|
||||
// (no CORS issue), and our backend fetches the upstream with the
|
||||
// correct Referer/Origin server-side (no CORS issue there either).
|
||||
try {{
|
||||
var b64url = function(s) {{
|
||||
return btoa(unescape(encodeURIComponent(s)))
|
||||
.replace(/\\+/g, '-').replace(/\\//g, '_').replace(/=+$/, '');
|
||||
}};
|
||||
var sameOrigin = function(u) {{
|
||||
try {{ return (new URL(u, document.baseURI || location.href)).origin === location.origin; }}
|
||||
catch (_) {{ return true; }}
|
||||
}};
|
||||
var toAbsolute = function(u) {{
|
||||
try {{ return (new URL(u, document.baseURI || location.href)).toString(); }}
|
||||
catch (_) {{ return u; }}
|
||||
}};
|
||||
var proxify = function(u) {{
|
||||
var abs = toAbsolute(u);
|
||||
if (sameOrigin(abs)) return u;
|
||||
// Don't double-proxy.
|
||||
if (abs.indexOf('/embed-asset?') !== -1 || abs.indexOf('/embed?') !== -1) return u;
|
||||
return location.origin + '/embed-asset?url=' + b64url(abs);
|
||||
}};
|
||||
|
||||
var _fetch = window.fetch && window.fetch.bind(window);
|
||||
if (_fetch) {{
|
||||
window.fetch = function(input, init) {{
|
||||
try {{
|
||||
if (typeof input === 'string') {{
|
||||
return _fetch(proxify(input), init);
|
||||
}} else if (input && input.url) {{
|
||||
var newUrl = proxify(input.url);
|
||||
if (newUrl !== input.url) {{
|
||||
return _fetch(new Request(newUrl, input), init);
|
||||
}}
|
||||
}}
|
||||
}} catch (e) {{}}
|
||||
return _fetch(input, init);
|
||||
}};
|
||||
}}
|
||||
|
||||
var XHR = window.XMLHttpRequest;
|
||||
if (XHR && XHR.prototype && XHR.prototype.open) {{
|
||||
var _open = XHR.prototype.open;
|
||||
XHR.prototype.open = function(method, url) {{
|
||||
try {{ url = proxify(url); }} catch (e) {{}}
|
||||
var args = Array.prototype.slice.call(arguments);
|
||||
args[1] = url;
|
||||
return _open.apply(this, args);
|
||||
}};
|
||||
}}
|
||||
}} catch (e) {{}}
|
||||
}})();</script>
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ Example:
|
|||
from backend.extractors.aceztrims import AceztrimsExtractor
|
||||
from backend.extractors.curated import CuratedExtractor
|
||||
from backend.extractors.daddylive import DaddyLiveExtractor
|
||||
from backend.extractors.demo import DemoExtractor
|
||||
from backend.extractors.discord_source import DiscordExtractor
|
||||
from backend.extractors.models import ExtractedStream
|
||||
from backend.extractors.pitsport import PitsportExtractor
|
||||
|
|
@ -42,12 +41,11 @@ def create_registry() -> ExtractorRegistry:
|
|||
|
||||
# --- Register extractors below ---
|
||||
# CuratedExtractor returns hand-picked 24/7 channels first so we always
|
||||
# have something. FallbackExtractor was removed — it surfaced aggregator
|
||||
# landing pages that don't play directly in an iframe (they require
|
||||
# user navigation through the page) and dominated the list with
|
||||
# entries that fail browser-based playback verification.
|
||||
# have something. DemoExtractor and FallbackExtractor were removed —
|
||||
# demo streams aren't F1 content (just Big Buck Bunny etc.) and
|
||||
# FallbackExtractor surfaced aggregator landing pages that don't play
|
||||
# directly in an iframe.
|
||||
registry.register(CuratedExtractor())
|
||||
registry.register(DemoExtractor())
|
||||
registry.register(StreamedExtractor())
|
||||
registry.register(DaddyLiveExtractor())
|
||||
registry.register(AceztrimsExtractor())
|
||||
|
|
|
|||
|
|
@ -42,13 +42,13 @@ EXCLUDED_DOMAINS = {
|
|||
}
|
||||
|
||||
# A URL is treated as a candidate stream embed only if its path looks like
|
||||
# a stream/embed/player route. This catches /embed/{id}, /stream/{id},
|
||||
# /watch/{id}, /live/{slug}, /player/{...} and similar — and rejects
|
||||
# /article/, /news/, /latest/, /join/, etc.
|
||||
# a *direct* player/embed page — `/embed/{id}`, `/player/{...}`, `*.m3u8`,
|
||||
# `*.php` (legacy iframe1.php style). Aggregator landing pages
|
||||
# (`/event/...`, `/watch?session=...`, etc.) are rejected because they
|
||||
# show a list of links instead of playing automatically — those produce
|
||||
# verifier-passing UI without actual playback.
|
||||
_PATH_KEYWORDS = (
|
||||
"embed/", "/stream", "/streams", "/watch", "/live",
|
||||
"/player", "/play/", "/sky", "/f1/", "/formula",
|
||||
"/grand-prix", "/gp/", "/channel", ".m3u8", ".php",
|
||||
"/embed/", "/player/", ".m3u8", ".php",
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -94,10 +94,21 @@ class ExtractionService:
|
|||
stream.is_live = verdict.is_playable
|
||||
stream.checked_at = now_iso
|
||||
|
||||
# Curated streams skip the verifier — they are hand-picked
|
||||
# 24/7 channels whose embed pages aggressively detect headless
|
||||
# automation. We can't reliably confirm playback server-side,
|
||||
# but we trust the curator. The user's real browser does NOT
|
||||
# trigger the same anti-bot heuristics (real plugins, real
|
||||
# mouse movements, etc.).
|
||||
CURATED_BYPASS = {"curated"}
|
||||
for stream in embed_streams:
|
||||
stream.checked_at = now_iso
|
||||
if stream.site_key in CURATED_BYPASS:
|
||||
stream.is_live = True
|
||||
stream.response_time_ms = 0
|
||||
continue
|
||||
key = stream.embed_url or stream.url
|
||||
verdict = verdicts.get(key)
|
||||
stream.checked_at = now_iso
|
||||
if verdict is None:
|
||||
# Verifier unavailable — fall back to "trust extractor".
|
||||
# This keeps the service usable even without playwright.
|
||||
|
|
|
|||
|
|
@ -162,12 +162,9 @@ _EMBED_POLL_JS = """
|
|||
sources: v.querySelectorAll('source').length,
|
||||
};
|
||||
}
|
||||
const player_divs = document.querySelectorAll(
|
||||
'[id*="player" i], [class*="player" i], [class*="jwplayer" i], [id*="video" i], [class*="video-js" i]'
|
||||
);
|
||||
return {has_video: false, has_player_div: player_divs.length > 0};
|
||||
return {has_video: false};
|
||||
} catch (e) {
|
||||
return {has_video: false, has_player_div: false, err: String(e)};
|
||||
return {has_video: false, err: String(e)};
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
|
@ -271,12 +268,14 @@ async def _verify_embed(page, proxied_url: str, deadline: float) -> PlaybackVerd
|
|||
)
|
||||
|
||||
# Track the best state seen across all polls. Some embeds load a player
|
||||
# div briefly then anti-bot JS tears the DOM down (hmembeds redirects
|
||||
# to google.com if its devtool-detection trips). We accept any positive
|
||||
# briefly then anti-bot JS tears the DOM down (hmembeds redirects to
|
||||
# google.com if its devtool-detection trips). We accept any positive
|
||||
# signal observed during the window, even if it's gone by timeout.
|
||||
#
|
||||
# We require an actual <video> element — a "player container div"
|
||||
# is too weak (sportsurge has player-class divs but no real player).
|
||||
seen_video_wired = False
|
||||
seen_video_tag = False
|
||||
seen_player_div = False
|
||||
last_err = ""
|
||||
|
||||
while time.monotonic() < deadline:
|
||||
|
|
@ -295,8 +294,6 @@ async def _verify_embed(page, proxied_url: str, deadline: float) -> PlaybackVerd
|
|||
is_playable=True, signal="video.wired",
|
||||
elapsed_ms=int((time.monotonic() - start) * 1000),
|
||||
)
|
||||
if r.get("has_player_div"):
|
||||
seen_player_div = True
|
||||
last_err = r.get("err", "")
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
|
|
@ -306,11 +303,8 @@ async def _verify_embed(page, proxied_url: str, deadline: float) -> PlaybackVerd
|
|||
if seen_video_tag:
|
||||
return PlaybackVerdict(is_playable=True, signal="video.tag_only",
|
||||
elapsed_ms=int((time.monotonic() - start) * 1000))
|
||||
if seen_player_div:
|
||||
return PlaybackVerdict(is_playable=True, signal="player_div",
|
||||
elapsed_ms=int((time.monotonic() - start) * 1000))
|
||||
|
||||
err = "no <video> or player container found"
|
||||
err = "no <video> element rendered"
|
||||
if last_err:
|
||||
err += f"; last_err: {last_err}"
|
||||
return PlaybackVerdict(is_playable=False, error=err,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue