Replace CPU-intensive headless Chrome + WebRTC pipeline with a lightweight Go reverse proxy that strips anti-framing headers (X-Frame-Options, CSP) and embeds streaming sites in iframes. - New internal/proxy package with URL rewriting for HTML/CSS - JS shim injection to intercept fetch/XHR/WebSocket/createElement - Referer reconstruction for correct cross-origin auth (HLS streams) - Inline iframe viewer preserving site navigation (not fullscreen overlay)
383 lines
11 KiB
Go
383 lines
11 KiB
Go
package extractor
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"net"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/chromedp/cdproto/fetch"
|
|
"github.com/chromedp/cdproto/input"
|
|
"github.com/chromedp/cdproto/network"
|
|
"github.com/chromedp/cdproto/page"
|
|
"github.com/chromedp/chromedp"
|
|
"github.com/gobwas/ws"
|
|
"github.com/gobwas/ws/wsutil"
|
|
"github.com/pion/webrtc/v4"
|
|
)
|
|
|
|
const (
|
|
sessionTimeout = 5 * time.Minute
|
|
defaultViewportW = 1280
|
|
defaultViewportH = 720
|
|
turnCredentialTTL = 24 * time.Hour
|
|
)
|
|
|
|
var (
|
|
turnURL string
|
|
turnSharedSecret string
|
|
turnInternalURL string
|
|
)
|
|
|
|
// SetTURNConfig sets the TURN server URL, shared secret, and optional internal URL.
|
|
// The internal URL is used by pion (server-side) to avoid hairpin NAT issues.
|
|
// The public URL is sent to the browser client.
|
|
func SetTURNConfig(url, secret, internalURL string) {
|
|
turnURL = url
|
|
turnSharedSecret = secret
|
|
turnInternalURL = internalURL
|
|
if turnInternalURL == "" {
|
|
turnInternalURL = "turn:coturn.coturn.svc.cluster.local:3478"
|
|
}
|
|
log.Printf("extractor: TURN configured: public=%s internal=%s", url, turnInternalURL)
|
|
}
|
|
|
|
var adDomains = []string{
|
|
"doubleclick.net", "googlesyndication.com", "googleadservices.com",
|
|
"google-analytics.com", "adnxs.com", "criteo.com", "outbrain.com",
|
|
"taboola.com", "amazon-adsystem.com", "popads.net", "popcash.net",
|
|
"juicyads.com", "exoclick.com", "trafficjunky.com", "propellerads.com",
|
|
"adsterra.com", "hilltopads.net", "revcontent.com", "mgid.com",
|
|
}
|
|
|
|
type inputMsg struct {
|
|
Type string `json:"type"`
|
|
X float64 `json:"x"`
|
|
Y float64 `json:"y"`
|
|
Button int `json:"button"`
|
|
DeltaX float64 `json:"deltaX"`
|
|
DeltaY float64 `json:"deltaY"`
|
|
Key string `json:"key"`
|
|
Code string `json:"code"`
|
|
Mods int `json:"modifiers"`
|
|
Width int `json:"width"`
|
|
Height int `json:"height"`
|
|
SDP string `json:"sdp"`
|
|
Candidate *webrtc.ICECandidateInit `json:"candidate"`
|
|
}
|
|
|
|
// HandleBrowserSession upgrades to WebSocket and runs a remote browser session
|
|
// with WebRTC video/audio streaming and CDP input relay.
|
|
func HandleBrowserSession(w http.ResponseWriter, r *http.Request, pageURL string) {
|
|
// Check session capacity
|
|
select {
|
|
case sessionSem <- struct{}{}:
|
|
defer func() { <-sessionSem }()
|
|
default:
|
|
http.Error(w, `{"error":"too many active browser sessions"}`, http.StatusServiceUnavailable)
|
|
return
|
|
}
|
|
|
|
conn, _, _, err := ws.UpgradeHTTP(r, w)
|
|
if err != nil {
|
|
log.Printf("extractor: session: ws upgrade failed: %v", err)
|
|
return
|
|
}
|
|
defer conn.Close()
|
|
|
|
ctx, cancel := context.WithCancel(r.Context())
|
|
defer cancel()
|
|
|
|
// Allocate display and start capture pipeline
|
|
display := nextDisplay()
|
|
viewW, viewH := defaultViewportW, defaultViewportH
|
|
|
|
cap, err := NewCapture(display, viewW, viewH)
|
|
if err != nil {
|
|
sendWSError(conn, "failed to start capture: "+err.Error())
|
|
log.Printf("extractor: session: capture error: %v", err)
|
|
return
|
|
}
|
|
defer cap.Close()
|
|
|
|
// Start Chrome on the virtual display
|
|
opts := append(chromedp.DefaultExecAllocatorOptions[:],
|
|
chromedp.Flag("headless", false),
|
|
chromedp.Flag("no-sandbox", true),
|
|
chromedp.Flag("disable-gpu", true),
|
|
chromedp.Flag("disable-software-rasterizer", true),
|
|
chromedp.Flag("disable-dev-shm-usage", true),
|
|
chromedp.Flag("disable-extensions", true),
|
|
chromedp.Flag("disable-background-networking", true),
|
|
chromedp.ModifyCmdFunc(func(cmd *exec.Cmd) {
|
|
cmd.Env = append(os.Environ(), fmt.Sprintf("DISPLAY=:%d", display))
|
|
}),
|
|
chromedp.Flag("autoplay-policy", "no-user-gesture-required"),
|
|
chromedp.Flag("window-size", fmt.Sprintf("%d,%d", viewW, viewH)),
|
|
chromedp.WSURLReadTimeout(30 * time.Second),
|
|
)
|
|
allocCtx, allocCancel := chromedp.NewExecAllocator(ctx, opts...)
|
|
defer allocCancel()
|
|
|
|
tabCtx, tabCancel := chromedp.NewContext(allocCtx)
|
|
defer tabCancel()
|
|
|
|
var wsMu sync.Mutex
|
|
|
|
// Build ICE servers for pion (server-side) — uses internal TURN URL to avoid hairpin NAT
|
|
iceServers := []webrtc.ICEServer{
|
|
{URLs: []string{"stun:stun.l.google.com:19302"}},
|
|
}
|
|
var turnCreds *TURNCredentials
|
|
if turnURL != "" && turnSharedSecret != "" {
|
|
// Server-side: use internal k8s DNS for TURN to bypass NAT
|
|
internalCreds := GenerateTURNCredentials(turnInternalURL, turnSharedSecret, turnCredentialTTL)
|
|
turnCreds = &internalCreds
|
|
iceServers = append(iceServers, webrtc.ICEServer{
|
|
URLs: internalCreds.URLs,
|
|
Username: internalCreds.Username,
|
|
Credential: internalCreds.Credential,
|
|
CredentialType: webrtc.ICECredentialTypePassword,
|
|
})
|
|
}
|
|
|
|
// Build ad-blocking fetch patterns
|
|
adPatterns := make([]*fetch.RequestPattern, 0, len(adDomains))
|
|
for _, domain := range adDomains {
|
|
adPatterns = append(adPatterns, &fetch.RequestPattern{
|
|
URLPattern: fmt.Sprintf("*://*.%s/*", domain),
|
|
})
|
|
}
|
|
|
|
// Set up event listeners before navigation
|
|
chromedp.ListenTarget(tabCtx, func(ev interface{}) {
|
|
switch e := ev.(type) {
|
|
case *fetch.EventRequestPaused:
|
|
go chromedp.Run(tabCtx, fetch.FailRequest(e.RequestID, network.ErrorReasonBlockedByClient))
|
|
case *page.EventFrameNavigated:
|
|
if e.Frame.ParentID == "" {
|
|
go sendURLUpdate(tabCtx, conn, &wsMu, e.Frame.URL)
|
|
}
|
|
case *page.EventNavigatedWithinDocument:
|
|
go sendURLUpdate(tabCtx, conn, &wsMu, e.URL)
|
|
}
|
|
})
|
|
|
|
// Enable fetch interception (ad blocking) and navigate
|
|
if err := chromedp.Run(tabCtx,
|
|
fetch.Enable().WithPatterns(adPatterns),
|
|
chromedp.Navigate(pageURL),
|
|
chromedp.WaitReady("body"),
|
|
); err != nil {
|
|
sendWSError(conn, "navigation failed")
|
|
log.Printf("extractor: session: navigate error for %s: %v", pageURL, err)
|
|
return
|
|
}
|
|
|
|
// Create WebRTC media stream
|
|
mediaStream, err := NewMediaStream(iceServers, func(c *webrtc.ICECandidate) {
|
|
data, _ := json.Marshal(map[string]interface{}{
|
|
"type": "ice",
|
|
"candidate": c.ToJSON(),
|
|
})
|
|
wsMu.Lock()
|
|
wsutil.WriteServerMessage(conn, ws.OpText, data)
|
|
wsMu.Unlock()
|
|
}, cancel)
|
|
if err != nil {
|
|
sendWSError(conn, "WebRTC setup failed")
|
|
log.Printf("extractor: session: webrtc error: %v", err)
|
|
return
|
|
}
|
|
defer mediaStream.Close()
|
|
|
|
// Create and send SDP offer
|
|
sdp, err := mediaStream.Offer()
|
|
if err != nil {
|
|
sendWSError(conn, "WebRTC offer failed")
|
|
log.Printf("extractor: session: offer error: %v", err)
|
|
return
|
|
}
|
|
|
|
// Send ICE config to client — uses PUBLIC TURN URL (for browser to reach from internet)
|
|
clientICE := []map[string]interface{}{
|
|
{"urls": []string{"stun:stun.l.google.com:19302"}},
|
|
}
|
|
if turnCreds != nil {
|
|
// Client-side: use public IP for TURN (browser connects from internet)
|
|
publicCreds := GenerateTURNCredentials(turnURL, turnSharedSecret, turnCredentialTTL)
|
|
clientICE = append(clientICE, map[string]interface{}{
|
|
"urls": publicCreds.URLs,
|
|
"username": publicCreds.Username,
|
|
"credential": publicCreds.Credential,
|
|
})
|
|
}
|
|
iceMsg, _ := json.Marshal(map[string]interface{}{
|
|
"type": "iceServers",
|
|
"iceServers": clientICE,
|
|
})
|
|
wsMu.Lock()
|
|
wsutil.WriteServerMessage(conn, ws.OpText, iceMsg)
|
|
wsMu.Unlock()
|
|
|
|
offerMsg, _ := json.Marshal(map[string]interface{}{
|
|
"type": "offer",
|
|
"sdp": sdp,
|
|
})
|
|
wsMu.Lock()
|
|
wsutil.WriteServerMessage(conn, ws.OpText, offerMsg)
|
|
wsMu.Unlock()
|
|
|
|
// Send ready message with viewport dimensions
|
|
readyMsg, _ := json.Marshal(map[string]interface{}{
|
|
"type": "ready",
|
|
"width": viewW,
|
|
"height": viewH,
|
|
})
|
|
wsMu.Lock()
|
|
wsutil.WriteServerMessage(conn, ws.OpText, readyMsg)
|
|
wsMu.Unlock()
|
|
|
|
// Start streaming video and audio from capture pipes
|
|
go mediaStream.StreamVideo(cap.videoR, ctx)
|
|
go mediaStream.StreamAudio(cap.audioR, ctx)
|
|
|
|
log.Printf("extractor: session: started for %s (display :%d)", pageURL, display)
|
|
|
|
// Inactivity timer — cancels session after no client input
|
|
inactivity := time.NewTimer(sessionTimeout)
|
|
defer inactivity.Stop()
|
|
go func() {
|
|
select {
|
|
case <-inactivity.C:
|
|
log.Printf("extractor: session: inactivity timeout for %s", pageURL)
|
|
cancel()
|
|
case <-ctx.Done():
|
|
}
|
|
}()
|
|
|
|
// Read loop — process signaling and input messages
|
|
for {
|
|
msgs, err := wsutil.ReadClientMessage(conn, nil)
|
|
if err != nil {
|
|
break
|
|
}
|
|
for _, m := range msgs {
|
|
if m.OpCode != ws.OpText {
|
|
continue
|
|
}
|
|
|
|
// Reset inactivity timer
|
|
if !inactivity.Stop() {
|
|
select {
|
|
case <-inactivity.C:
|
|
default:
|
|
}
|
|
}
|
|
inactivity.Reset(sessionTimeout)
|
|
|
|
var msg inputMsg
|
|
if err := json.Unmarshal(m.Payload, &msg); err != nil {
|
|
continue
|
|
}
|
|
|
|
switch msg.Type {
|
|
case "answer":
|
|
if err := mediaStream.SetAnswer(msg.SDP); err != nil {
|
|
log.Printf("extractor: session: set answer error: %v", err)
|
|
}
|
|
case "ice":
|
|
if msg.Candidate != nil {
|
|
if err := mediaStream.AddICECandidate(*msg.Candidate); err != nil {
|
|
log.Printf("extractor: session: add ICE error: %v", err)
|
|
}
|
|
}
|
|
case "back":
|
|
chromedp.Run(tabCtx, chromedp.NavigateBack())
|
|
case "forward":
|
|
chromedp.Run(tabCtx, chromedp.NavigateForward())
|
|
default:
|
|
handleInput(tabCtx, &msg)
|
|
}
|
|
}
|
|
}
|
|
|
|
log.Printf("extractor: session: ended for %s", pageURL)
|
|
}
|
|
|
|
func handleInput(ctx context.Context, msg *inputMsg) {
|
|
switch msg.Type {
|
|
case "mousemove":
|
|
chromedp.Run(ctx,
|
|
input.DispatchMouseEvent(input.MouseMoved, msg.X, msg.Y))
|
|
case "mousedown":
|
|
chromedp.Run(ctx,
|
|
input.DispatchMouseEvent(input.MousePressed, msg.X, msg.Y).
|
|
WithButton(mapButton(msg.Button)).WithClickCount(1))
|
|
case "mouseup":
|
|
chromedp.Run(ctx,
|
|
input.DispatchMouseEvent(input.MouseReleased, msg.X, msg.Y).
|
|
WithButton(mapButton(msg.Button)))
|
|
case "scroll":
|
|
chromedp.Run(ctx,
|
|
input.DispatchMouseEvent(input.MouseWheel, msg.X, msg.Y).
|
|
WithDeltaX(msg.DeltaX).WithDeltaY(msg.DeltaY))
|
|
case "keydown":
|
|
chromedp.Run(ctx,
|
|
input.DispatchKeyEvent(input.KeyDown).
|
|
WithKey(msg.Key).WithCode(msg.Code).
|
|
WithModifiers(input.Modifier(msg.Mods)))
|
|
case "keyup":
|
|
chromedp.Run(ctx,
|
|
input.DispatchKeyEvent(input.KeyUp).
|
|
WithKey(msg.Key).WithCode(msg.Code).
|
|
WithModifiers(input.Modifier(msg.Mods)))
|
|
}
|
|
}
|
|
|
|
func mapButton(jsButton int) input.MouseButton {
|
|
switch jsButton {
|
|
case 1:
|
|
return input.Middle
|
|
case 2:
|
|
return input.Right
|
|
default:
|
|
return input.Left
|
|
}
|
|
}
|
|
|
|
func sendURLUpdate(tabCtx context.Context, conn net.Conn, mu *sync.Mutex, currentURL string) {
|
|
var canBack, canForward bool
|
|
var entries []*page.NavigationEntry
|
|
var currentIndex int64
|
|
|
|
if err := chromedp.Run(tabCtx, chromedp.ActionFunc(func(ctx context.Context) error {
|
|
var err error
|
|
currentIndex, entries, err = page.GetNavigationHistory().Do(ctx)
|
|
return err
|
|
})); err == nil {
|
|
canBack = currentIndex > 0
|
|
canForward = int(currentIndex) < len(entries)-1
|
|
}
|
|
|
|
data, _ := json.Marshal(map[string]interface{}{
|
|
"type": "url",
|
|
"url": currentURL,
|
|
"canBack": canBack,
|
|
"canForward": canForward,
|
|
})
|
|
mu.Lock()
|
|
wsutil.WriteServerMessage(conn, ws.OpText, data)
|
|
mu.Unlock()
|
|
}
|
|
|
|
func sendWSError(conn net.Conn, msg string) {
|
|
data, _ := json.Marshal(map[string]string{"type": "error", "message": msg})
|
|
wsutil.WriteServerMessage(conn, ws.OpText, data)
|
|
}
|