f1-stream: revive aceztrims + pitsport, more ppv variants
- aceztrims: scrape /f11/ (the actual stream page), not /f1/ (the
cross-sport schedule). Drop the dead /iframe1?s= + onclick m3u8
regexes (site moved to `getElementById('iframe').src = '...'` ~20
channels ago). Strip HTML comments first so the ~20 legacy buttons
kept inside <!-- ... --> stop showing up as false positives.
Also pick up the default inline <iframe id='iframe' src='...'>.
Local run: 11 channels (was 0).
- pitsport: decode the RSC payload before regex-matching in
_parse_live_events (raw HTML had it escape-encoded, so the homepage
card path was silently 0). Add the new /live-now route (canonical
what's-live-right-now list). Add "f1" to MOTORSPORT_CATEGORIES — the
site labels Formula 1 events as just "F1". Refresh the stale
serveplay.site docstring (host rotates; pushembdz's api/stream link
is authoritative).
Local run: 7 m3u8 streams covering Canadian GP (EN1/EN2/MULTI/ITA/ESP)
+ NASCAR Coke 600 (was 0).
- ppv: always emit the parent embed alongside substreams (was dropping
it whenever substreams existed). Prefer source_tag in substream titles
so users see "Sky Sport 1 NZ" / "Apple TV (F1TV)" instead of generic
#1/#2 suffixes.
Diagnosed against the live cluster (curated + 7 other extractors
returning 0 cached streams, only 2 dead hmembeds curated 24/7 channels
visible to users). Each fix verified with the extractor run against
live sites this turn.
This commit is contained in:
parent
d5f73ce109
commit
5a0e4b3dac
3 changed files with 147 additions and 125 deletions
|
|
@ -34,7 +34,7 @@ USER_AGENT = (
|
|||
# to also surface MotoGP and adjacent motorsports — keeps the f1-stream
|
||||
# UI useful between race weekends and during the off-season.
|
||||
MOTORSPORT_CATEGORIES = {
|
||||
"formula 1", "formula 2", "formula 3",
|
||||
"f1", "formula 1", "formula 2", "formula 3",
|
||||
"motogp", "moto gp", "moto2", "moto3", "motoe",
|
||||
"world rally championship", "wrc",
|
||||
"world endurance championship", "wec",
|
||||
|
|
@ -85,27 +85,61 @@ _is_f1_category = _is_motorsport_category
|
|||
_is_f1_event = _is_motorsport_event
|
||||
|
||||
|
||||
def _parse_live_events(html: str) -> list[_PitsportEvent]:
|
||||
"""Parse live events from the main page RSC payload.
|
||||
def _decode_rsc_payload(html: str) -> str:
|
||||
"""Concatenate and unescape all `self.__next_f.push([1, "..."])` chunks.
|
||||
|
||||
The main page contains event cards with props:
|
||||
category, title, time, imageUrl
|
||||
wrapped in <a href="/watch/{UUID}"> links.
|
||||
Next.js RSC ships its tree as escape-encoded strings inside repeated
|
||||
`self.__next_f.push` calls. Regex over the raw HTML misses everything
|
||||
interesting; we have to decode unicode escapes first.
|
||||
"""
|
||||
chunks = re.findall(r'self\.__next_f\.push\(\[1,"(.*?)"\]\)', html, re.DOTALL)
|
||||
if not chunks:
|
||||
return ""
|
||||
payload = ""
|
||||
for chunk in chunks:
|
||||
try:
|
||||
payload += chunk.encode().decode("unicode_escape")
|
||||
except Exception:
|
||||
payload += chunk
|
||||
return payload
|
||||
|
||||
|
||||
def _parse_live_events(html: str) -> list[_PitsportEvent]:
|
||||
"""Parse live events from the main page (or `/live-now`) RSC payload.
|
||||
|
||||
The pages embed event cards inside the Next.js RSC payload; the raw
|
||||
HTML keeps it escape-encoded so we decode first, then match.
|
||||
Two shapes are common:
|
||||
1) Older card props: "category":"...","title":"..." next to
|
||||
"href":"/watch/UUID".
|
||||
2) Newer `event` prop: an `event` object with `uri:"/watch/UUID"`
|
||||
carrying `category` and `title`.
|
||||
"""
|
||||
payload = _decode_rsc_payload(html) or html
|
||||
|
||||
events: list[_PitsportEvent] = []
|
||||
|
||||
# Match event cards in the RSC payload - they appear as JSON-like structures
|
||||
# Pattern: href="/watch/UUID" ... category":"...", "title":"..."
|
||||
# In the RSC payload, the data is in the format:
|
||||
# ["$","$L2","/watch/UUID",{"href":"/watch/UUID","children":["$","$L10",null,
|
||||
# {"category":"...","title":"...","time":...,"imageUrl":"..."}]}]
|
||||
pattern = re.compile(
|
||||
href_pattern = re.compile(
|
||||
r'"href":"(/watch/([0-9a-f-]{36}))"[^}]*?"category":"([^"]+)","title":"([^"]+)"',
|
||||
)
|
||||
for match in pattern.finditer(html):
|
||||
for match in href_pattern.finditer(payload):
|
||||
_, uuid, category, title = match.groups()
|
||||
events.append(_PitsportEvent(category=category, title=title, watch_uuid=uuid))
|
||||
|
||||
event_pattern = re.compile(
|
||||
r'"event":\{[^{}]*?"title":"([^"]+)"[^{}]*?"uri":"/watch/([0-9a-f-]{36})"[^{}]*?"category":"([^"]+)"',
|
||||
)
|
||||
for match in event_pattern.finditer(payload):
|
||||
title, uuid, category = match.groups()
|
||||
events.append(_PitsportEvent(category=category, title=title, watch_uuid=uuid))
|
||||
|
||||
event_pattern_alt = re.compile(
|
||||
r'"event":\{[^{}]*?"category":"([^"]+)"[^{}]*?"title":"([^"]+)"[^{}]*?"uri":"/watch/([0-9a-f-]{36})"',
|
||||
)
|
||||
for match in event_pattern_alt.finditer(payload):
|
||||
category, title, uuid = match.groups()
|
||||
events.append(_PitsportEvent(category=category, title=title, watch_uuid=uuid))
|
||||
|
||||
return events
|
||||
|
||||
|
||||
|
|
@ -301,13 +335,12 @@ def _is_m3u8_method(method: str) -> bool:
|
|||
|
||||
|
||||
def _extract_m3u8_url(link: str) -> str:
|
||||
"""Convert a serveplay.site player URL to an m3u8 playlist URL.
|
||||
"""Pass through the link from pushembdz's `api/stream/<slug>` response.
|
||||
|
||||
Input: https://dash.serveplay.site/{channel}/index.html
|
||||
Output: https://dash.serveplay.site/{channel}/index.html
|
||||
|
||||
The index.html IS the m3u8 playlist (served with proper content-type
|
||||
when fetched with the correct Referer header).
|
||||
The host has rotated over time (serveplay.site → oe1.ossfeed.store →
|
||||
…); the response is always a master playlist URL we hand to the
|
||||
player as-is. Content-Type may be `text/css` or `application/json` —
|
||||
treat as HLS based on body sniffing (`#EXTM3U`), not MIME.
|
||||
"""
|
||||
return link
|
||||
|
||||
|
|
@ -388,6 +421,24 @@ class PitsportExtractor(BaseExtractor):
|
|||
except Exception:
|
||||
logger.exception("[pitsport] Failed to fetch main page")
|
||||
|
||||
# Fetch /live-now — canonical "currently live" list, added 2026.
|
||||
try:
|
||||
resp = await client.get(f"{PITSPORT_BASE}/live-now")
|
||||
if resp.status_code == 200:
|
||||
live_now_events = _parse_live_events(resp.text)
|
||||
logger.info(
|
||||
"[pitsport] Live-now page: %d event(s)", len(live_now_events)
|
||||
)
|
||||
for ev in live_now_events:
|
||||
if _is_f1_event(ev.category, ev.title):
|
||||
all_events.append(ev)
|
||||
else:
|
||||
logger.warning(
|
||||
"[pitsport] Live-now page returned HTTP %d", resp.status_code
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("[pitsport] Failed to fetch live-now page")
|
||||
|
||||
# Fetch schedule page for upcoming events
|
||||
try:
|
||||
resp = await client.get(f"{PITSPORT_BASE}/schedule")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue