infra/modules/kubernetes/f1-stream/files/internal/scraper/validate.go
Viktor Barzin 450dfc28e4
[ci skip] Add reverse proxy mode to f1-stream
Replace CPU-intensive headless Chrome + WebRTC pipeline with a
lightweight Go reverse proxy that strips anti-framing headers
(X-Frame-Options, CSP) and embeds streaming sites in iframes.

- New internal/proxy package with URL rewriting for HTML/CSS
- JS shim injection to intercept fetch/XHR/WebSocket/createElement
- Referer reconstruction for correct cross-origin auth (HLS streams)
- Inline iframe viewer preserving site navigation (not fullscreen overlay)
2026-02-21 21:23:21 +00:00

142 lines
3.5 KiB
Go

package scraper
import (
"io"
"log"
"net/http"
"strings"
"time"
"f1-stream/internal/models"
)
// videoMarkers are substrings checked (case-insensitively) against the HTML
// body to detect the presence of a video player or streaming manifest.
var videoMarkers = []string{
// HTML5 video element
"<video",
// HLS manifests
".m3u8",
"application/x-mpegurl",
"application/vnd.apple.mpegurl",
// DASH manifests
".mpd",
"application/dash+xml",
// Player libraries
"hls.js",
"hls.min.js",
"dash.js",
"dash.all.min.js",
"video.js",
"video.min.js",
"videojs",
"jwplayer",
"clappr",
"flowplayer",
"plyr",
"shaka-player",
"mediaelement",
"fluidplayer",
}
// videoContentTypes are Content-Type prefixes/substrings that indicate a
// direct video response (no HTML inspection needed).
var videoContentTypes = []string{
"video/",
"application/x-mpegurl",
"application/vnd.apple.mpegurl",
"application/dash+xml",
}
// validateBodyLimit caps how much HTML we read when looking for markers.
const validateBodyLimit = 2 * 1024 * 1024 // 2 MB
// validateLinks fetches each link and keeps only those whose response
// contains video/player content markers.
func validateLinks(links []models.ScrapedLink, timeout time.Duration) []models.ScrapedLink {
client := &http.Client{
Timeout: timeout,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
if len(via) >= 3 {
return http.ErrUseLastResponse
}
return nil
},
}
var kept []models.ScrapedLink
for _, link := range links {
if HasVideoContent(client, link.URL) {
kept = append(kept, link)
} else {
log.Printf("scraper: discarded %s (no video markers)", truncate(link.URL, 60))
}
}
return kept
}
// HasVideoContent performs a GET request for rawURL and returns true if the
// response is a direct video file (by Content-Type) or an HTML page that
// contains at least one video marker substring.
func HasVideoContent(client *http.Client, rawURL string) bool {
req, err := http.NewRequest("GET", rawURL, nil)
if err != nil {
log.Printf("scraper: validate request error for %s: %v", truncate(rawURL, 60), err)
return false
}
req.Header.Set("User-Agent", userAgent)
resp, err := client.Do(req)
if err != nil {
log.Printf("scraper: validate fetch error for %s: %v", truncate(rawURL, 60), err)
return false
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 400 {
return false
}
ct := strings.ToLower(resp.Header.Get("Content-Type"))
// Direct video content type — no need to inspect body.
if isDirectVideoContentType(ct) {
return true
}
// Only inspect HTML pages for markers.
if !strings.Contains(ct, "text/html") && !strings.Contains(ct, "application/xhtml") {
return false
}
body, err := io.ReadAll(io.LimitReader(resp.Body, validateBodyLimit))
if err != nil {
log.Printf("scraper: validate read error for %s: %v", truncate(rawURL, 60), err)
return false
}
return containsVideoMarkers(strings.ToLower(string(body)))
}
// containsVideoMarkers returns true if loweredBody contains any known video
// player or streaming marker substring.
func containsVideoMarkers(loweredBody string) bool {
for _, marker := range videoMarkers {
if strings.Contains(loweredBody, marker) {
return true
}
}
return false
}
// isDirectVideoContentType returns true if ct (already lowercased) matches a
// known video content type.
func isDirectVideoContentType(ct string) bool {
ct = strings.ToLower(ct)
for _, vct := range videoContentTypes {
if strings.Contains(ct, vct) {
return true
}
}
return false
}