[ci skip] Move Terraform modules into stack directories

Move all 88 service modules (66 individual + 22 platform) from
modules/kubernetes/<service>/ into their corresponding stack directories:

- Service stacks: stacks/<service>/module/
- Platform stack: stacks/platform/modules/<service>/

This collocates module source code with its Terragrunt definition.
Only shared utility modules remain in modules/kubernetes/:
ingress_factory, setup_tls_secret, dockerhub_secret, oauth-proxy.

All cross-references to shared modules updated to use correct
relative paths. Verified with terragrunt run --all -- plan:
0 adds, 0 destroys across all 68 stacks.
This commit is contained in:
Viktor Barzin 2026-02-22 14:38:14 +00:00
parent 73cb696f12
commit e225e81ebf
No known key found for this signature in database
GPG key ID: 0EB088298288D958
614 changed files with 12075 additions and 352 deletions

View file

@ -0,0 +1,327 @@
package scraper
import (
"crypto/rand"
"encoding/json"
"fmt"
"io"
"log"
"math"
"net/http"
"net/url"
"regexp"
"strings"
"time"
"f1-stream/internal/models"
)
const (
subredditURL = "https://www.reddit.com/r/motorsportsstreams2/new.json?limit=25"
userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
requestDelay = 1 * time.Second
)
var (
urlRe = regexp.MustCompile(`https?://[^\s\)\]\>"]+`)
// Keywords in post title that indicate F1 content (matched case-insensitively)
f1Keywords = []string{
"f1",
"formula 1",
"formula one",
"formula1",
"grand prix",
"gp qualifying",
"gp race",
"gp sprint",
"gp practice",
}
f1NegativeKeywords = []string{
"f1 key",
"function 1",
"help f1",
}
// URLs to filter out (not stream sources)
filteredDomains = map[string]bool{
"reddit.com": true,
"www.reddit.com": true,
"imgur.com": true,
"i.imgur.com": true,
"redd.it": true,
"i.redd.it": true,
"v.redd.it": true,
"youtu.be": true,
"youtube.com": true,
"twitter.com": true,
"x.com": true,
}
)
type redditListing struct {
Data struct {
Children []struct {
Data struct {
Title string `json:"title"`
SelfText string `json:"selftext"`
Permalink string `json:"permalink"`
CreatedUTC float64 `json:"created_utc"`
} `json:"data"`
} `json:"children"`
} `json:"data"`
}
type redditComments []struct {
Data struct {
Children []struct {
Data struct {
Body string `json:"body"`
Replies json.RawMessage `json:"replies"`
} `json:"data"`
} `json:"children"`
} `json:"data"`
}
func scrapeReddit() ([]models.ScrapedLink, error) {
client := &http.Client{Timeout: 15 * time.Second}
var allLinks []models.ScrapedLink
seen := make(map[string]bool)
log.Printf("scraper: fetching listing from %s", subredditURL)
listing, err := fetchJSON[redditListing](client, subredditURL)
if err != nil {
return nil, fmt.Errorf("fetch listing: %w", err)
}
totalPosts := len(listing.Data.Children)
matchedPosts := 0
log.Printf("scraper: got %d posts from listing", totalPosts)
for _, child := range listing.Data.Children {
post := child.Data
if !isF1Post(post.Title) {
log.Printf("scraper: skipped post: %s", truncate(post.Title, 60))
continue
}
matchedPosts++
log.Printf("scraper: matched post: %s", truncate(post.Title, 60))
selftextLinks := extractURLs(post.SelfText, post.Title)
log.Printf("scraper: extracted %d URLs from selftext of %q", len(selftextLinks), truncate(post.Title, 40))
for _, link := range selftextLinks {
norm := normalizeURL(link.URL)
if !seen[norm] {
seen[norm] = true
allLinks = append(allLinks, link)
}
}
time.Sleep(requestDelay)
commentsURL := fmt.Sprintf("https://www.reddit.com%s.json", post.Permalink)
comments, err := fetchJSONWithRetry[redditComments](client, commentsURL, 3)
if err != nil {
log.Printf("scraper: failed to fetch comments for %s: %v", post.Permalink, err)
continue
}
commentURLCount := 0
walkComments(*comments, func(body string) {
links := extractURLs(body, post.Title)
commentURLCount += len(links)
for _, link := range links {
norm := normalizeURL(link.URL)
if !seen[norm] {
seen[norm] = true
allLinks = append(allLinks, link)
}
}
})
log.Printf("scraper: extracted %d URLs from comments of %q", commentURLCount, truncate(post.Title, 40))
time.Sleep(requestDelay)
}
log.Printf("scraper: summary — matched %d/%d posts, extracted %d unique URLs", matchedPosts, totalPosts, len(allLinks))
return allLinks, nil
}
func fetchJSON[T any](client *http.Client, rawURL string) (*T, error) {
req, err := http.NewRequest("GET", rawURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", userAgent)
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
log.Printf("scraper: GET %s -> %d", truncate(rawURL, 80), resp.StatusCode)
if resp.StatusCode != 200 {
return nil, fmt.Errorf("status %d", resp.StatusCode)
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 5*1024*1024))
if err != nil {
return nil, err
}
var result T
if err := json.Unmarshal(body, &result); err != nil {
return nil, err
}
return &result, nil
}
func fetchJSONWithRetry[T any](client *http.Client, rawURL string, maxRetries int) (*T, error) {
var lastErr error
for attempt := 0; attempt <= maxRetries; attempt++ {
result, err := fetchJSON[T](client, rawURL)
if err == nil {
return result, nil
}
lastErr = err
errMsg := err.Error()
if strings.Contains(errMsg, "status 429") {
log.Printf("scraper: rate limited on %s, backing off 30s", truncate(rawURL, 60))
time.Sleep(30 * time.Second)
continue
}
if strings.Contains(errMsg, "status 502") || strings.Contains(errMsg, "status 503") {
backoff := time.Duration(math.Pow(2, float64(attempt))) * time.Second
log.Printf("scraper: server error on %s, retry %d/%d in %v", truncate(rawURL, 60), attempt+1, maxRetries, backoff)
time.Sleep(backoff)
continue
}
return nil, err
}
return nil, fmt.Errorf("after %d retries: %w", maxRetries, lastErr)
}
// deobfuscateText normalises obfuscated URLs commonly posted on Reddit to
// evade auto-moderation. Examples:
// - "pitsport . xyz/watch/f1" → "https://pitsport.xyz/watch/f1"
// - "dlhd dot link" → "https://dlhd.link"
func deobfuscateText(text string) string {
// Common TLDs used in streaming links.
tlds := `(?:com|net|org|xyz|link|info|live|tv|me|cc|to|io|co|stream|site|fun|top|club|watch|racing)`
// 1. Replace " dot " (case-insensitive) between word-like parts that
// look like domain components: "dlhd dot link" → "dlhd.link"
dotWord := regexp.MustCompile(`(?i)(\b\w[\w-]*)\s+dot\s+(` + tlds + `\b)`)
text = dotWord.ReplaceAllString(text, "${1}.${2}")
// 2. Collapse spaces around dots in domain-like strings:
// "pitsport . xyz" → "pitsport.xyz"
spaceDot := regexp.MustCompile(`(\b\w[\w-]*)\s*\.\s*(` + tlds + `\b)`)
text = spaceDot.ReplaceAllString(text, "${1}.${2}")
// 3. Prepend https:// to bare domain-like strings that the URL regex
// would otherwise miss (no scheme present).
bareDomain := regexp.MustCompile(`(?:^|[\s(>\[])(\w[\w-]*\.` + tlds + `(?:/[^\s)\]<"]*)?)`)
text = bareDomain.ReplaceAllStringFunc(text, func(m string) string {
// Preserve the leading whitespace/punctuation character.
trimmed := strings.TrimLeft(m, " \t\n(>[")
prefix := m[:len(m)-len(trimmed)]
if strings.HasPrefix(trimmed, "http://") || strings.HasPrefix(trimmed, "https://") {
return m
}
return prefix + "https://" + trimmed
})
return text
}
func extractURLs(text, postTitle string) []models.ScrapedLink {
text = deobfuscateText(text)
matches := urlRe.FindAllString(text, -1)
var links []models.ScrapedLink
filtered := 0
for _, u := range matches {
u = strings.TrimRight(u, ".,;:!?)")
parsed, err := url.Parse(u)
if err != nil {
continue
}
if filteredDomains[parsed.Hostname()] {
filtered++
continue
}
id := make([]byte, 16)
if _, err := rand.Read(id); err != nil {
continue
}
links = append(links, models.ScrapedLink{
ID: fmt.Sprintf("%x", id),
URL: u,
Title: postTitle,
Source: "r/motorsportsstreams2",
ScrapedAt: time.Now(),
})
}
if filtered > 0 {
log.Printf("scraper: filtered %d URLs from known domains in %q", filtered, truncate(postTitle, 40))
}
return links
}
func walkComments(comments redditComments, fn func(string)) {
for _, listing := range comments {
for _, child := range listing.Data.Children {
if child.Data.Body != "" {
fn(child.Data.Body)
}
// Recurse into replies
if len(child.Data.Replies) > 0 && child.Data.Replies[0] == '{' {
var nested redditComments
if err := json.Unmarshal([]byte("["+string(child.Data.Replies)+"]"), &nested); err == nil {
walkComments(nested, fn)
}
}
}
}
}
func normalizeURL(u string) string {
parsed, err := url.Parse(u)
if err != nil {
return strings.ToLower(u)
}
parsed.Host = strings.ToLower(parsed.Host)
path := strings.TrimRight(parsed.Path, "/")
return fmt.Sprintf("%s://%s%s", parsed.Scheme, parsed.Host, path)
}
func isF1Post(title string) bool {
lower := strings.ToLower(title)
for _, neg := range f1NegativeKeywords {
if strings.Contains(lower, neg) {
return false
}
}
for _, kw := range f1Keywords {
if strings.Contains(lower, kw) {
return true
}
}
return false
}
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}

View file

@ -0,0 +1,105 @@
package scraper
import (
"context"
"log"
"sync"
"time"
"f1-stream/internal/models"
"f1-stream/internal/store"
)
type Scraper struct {
store *store.Store
interval time.Duration
validateTimeout time.Duration
mu sync.Mutex
}
func New(s *store.Store, interval time.Duration, validateTimeout time.Duration) *Scraper {
return &Scraper{store: s, interval: interval, validateTimeout: validateTimeout}
}
func (s *Scraper) Run(ctx context.Context) {
log.Printf("scraper: starting with interval %v", s.interval)
// Run immediately on start
s.scrape()
ticker := time.NewTicker(s.interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
log.Println("scraper: shutting down")
return
case <-ticker.C:
s.scrape()
}
}
}
func (s *Scraper) TriggerScrape() {
go s.scrape()
}
func (s *Scraper) scrape() {
s.mu.Lock()
defer s.mu.Unlock()
start := time.Now()
log.Println("scraper: starting scrape")
links, err := scrapeReddit()
if err != nil {
log.Printf("scraper: error after %v: %v", time.Since(start).Round(time.Millisecond), err)
return
}
log.Printf("scraper: reddit scrape completed in %v, got %d links", time.Since(start).Round(time.Millisecond), len(links))
// Merge with existing links, filtering out non-F1 entries
existing, err := s.store.LoadScrapedLinks()
if err != nil {
log.Printf("scraper: failed to load existing links: %v", err)
existing = nil
}
seen := make(map[string]bool)
var filtered []models.ScrapedLink
for _, l := range existing {
if !isF1Post(l.Title) {
continue
}
norm := normalizeURL(l.URL)
seen[norm] = true
filtered = append(filtered, l)
}
existing = filtered
added := 0
for _, l := range links {
norm := normalizeURL(l.URL)
if !seen[norm] {
existing = append(existing, l)
seen[norm] = true
added++
}
}
if err := s.store.SaveScrapedLinks(existing); err != nil {
log.Printf("scraper: failed to save: %v", err)
return
}
// Auto-publish newly validated links as streams
for _, l := range links {
if err := s.store.PublishScrapedStream(l.URL, l.Title); err != nil {
u := l.URL
if len(u) > 80 {
u = u[:80] + "..."
}
log.Printf("scraper: failed to auto-publish %s: %v", u, err)
}
}
log.Printf("scraper: done in %v, added %d new links (total: %d)", time.Since(start).Round(time.Millisecond), added, len(existing))
}

View file

@ -0,0 +1,142 @@
package scraper
import (
"io"
"log"
"net/http"
"strings"
"time"
"f1-stream/internal/models"
)
// videoMarkers are substrings checked (case-insensitively) against the HTML
// body to detect the presence of a video player or streaming manifest.
var videoMarkers = []string{
// HTML5 video element
"<video",
// HLS manifests
".m3u8",
"application/x-mpegurl",
"application/vnd.apple.mpegurl",
// DASH manifests
".mpd",
"application/dash+xml",
// Player libraries
"hls.js",
"hls.min.js",
"dash.js",
"dash.all.min.js",
"video.js",
"video.min.js",
"videojs",
"jwplayer",
"clappr",
"flowplayer",
"plyr",
"shaka-player",
"mediaelement",
"fluidplayer",
}
// videoContentTypes are Content-Type prefixes/substrings that indicate a
// direct video response (no HTML inspection needed).
var videoContentTypes = []string{
"video/",
"application/x-mpegurl",
"application/vnd.apple.mpegurl",
"application/dash+xml",
}
// validateBodyLimit caps how much HTML we read when looking for markers.
const validateBodyLimit = 2 * 1024 * 1024 // 2 MB
// validateLinks fetches each link and keeps only those whose response
// contains video/player content markers.
func validateLinks(links []models.ScrapedLink, timeout time.Duration) []models.ScrapedLink {
client := &http.Client{
Timeout: timeout,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
if len(via) >= 3 {
return http.ErrUseLastResponse
}
return nil
},
}
var kept []models.ScrapedLink
for _, link := range links {
if HasVideoContent(client, link.URL) {
kept = append(kept, link)
} else {
log.Printf("scraper: discarded %s (no video markers)", truncate(link.URL, 60))
}
}
return kept
}
// HasVideoContent performs a GET request for rawURL and returns true if the
// response is a direct video file (by Content-Type) or an HTML page that
// contains at least one video marker substring.
func HasVideoContent(client *http.Client, rawURL string) bool {
req, err := http.NewRequest("GET", rawURL, nil)
if err != nil {
log.Printf("scraper: validate request error for %s: %v", truncate(rawURL, 60), err)
return false
}
req.Header.Set("User-Agent", userAgent)
resp, err := client.Do(req)
if err != nil {
log.Printf("scraper: validate fetch error for %s: %v", truncate(rawURL, 60), err)
return false
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 400 {
return false
}
ct := strings.ToLower(resp.Header.Get("Content-Type"))
// Direct video content type — no need to inspect body.
if isDirectVideoContentType(ct) {
return true
}
// Only inspect HTML pages for markers.
if !strings.Contains(ct, "text/html") && !strings.Contains(ct, "application/xhtml") {
return false
}
body, err := io.ReadAll(io.LimitReader(resp.Body, validateBodyLimit))
if err != nil {
log.Printf("scraper: validate read error for %s: %v", truncate(rawURL, 60), err)
return false
}
return containsVideoMarkers(strings.ToLower(string(body)))
}
// containsVideoMarkers returns true if loweredBody contains any known video
// player or streaming marker substring.
func containsVideoMarkers(loweredBody string) bool {
for _, marker := range videoMarkers {
if strings.Contains(loweredBody, marker) {
return true
}
}
return false
}
// isDirectVideoContentType returns true if ct (already lowercased) matches a
// known video content type.
func isDirectVideoContentType(ct string) bool {
ct = strings.ToLower(ct)
for _, vct := range videoContentTypes {
if strings.Contains(ct, vct) {
return true
}
}
return false
}

View file

@ -0,0 +1,124 @@
package scraper
import "testing"
func TestContainsVideoMarkers(t *testing.T) {
tests := []struct {
name string
body string
want bool
}{
// Positive cases
{
name: "video tag",
body: `<div><video src="stream.mp4"></video></div>`,
want: true,
},
{
name: "HLS manifest reference",
body: `var url = "https://cdn.example.com/live.m3u8";`,
want: true,
},
{
name: "DASH manifest reference",
body: `<source src="stream.mpd" type="application/dash+xml">`,
want: true,
},
{
name: "HLS.js library",
body: `<script src="/js/hls.min.js"></script>`,
want: true,
},
{
name: "Video.js library",
body: `<script src="https://cdn.example.com/video.js"></script>`,
want: true,
},
{
name: "JW Player",
body: `<div id="jwplayer-container"></div><script>jwplayer("jwplayer-container")</script>`,
want: true,
},
{
name: "Clappr player",
body: `<script src="clappr.min.js"></script>`,
want: true,
},
{
name: "Flowplayer",
body: `<script>flowplayer("#player")</script>`,
want: true,
},
{
name: "Plyr player",
body: `<link rel="stylesheet" href="plyr.css"><script src="plyr.js"></script>`,
want: true,
},
{
name: "Shaka Player",
body: `<script src="shaka-player.compiled.js"></script>`,
want: true,
},
// Negative cases
{
name: "plain HTML",
body: `<html><body><p>Hello world</p></body></html>`,
want: false,
},
{
name: "reddit link page",
body: `<html><body><a href="https://example.com">Click here</a></body></html>`,
want: false,
},
{
name: "blog post",
body: `<html><body><article>F1 race results and analysis...</article></body></html>`,
want: false,
},
{
name: "empty string",
body: "",
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := containsVideoMarkers(tt.body)
if got != tt.want {
t.Errorf("containsVideoMarkers(%q) = %v, want %v", truncate(tt.body, 60), got, tt.want)
}
})
}
}
func TestIsDirectVideoContentType(t *testing.T) {
tests := []struct {
name string
ct string
want bool
}{
// Positive cases
{name: "video/mp4", ct: "video/mp4", want: true},
{name: "video/webm", ct: "video/webm", want: true},
{name: "HLS content type", ct: "application/x-mpegurl", want: true},
{name: "Apple HLS content type", ct: "application/vnd.apple.mpegurl", want: true},
{name: "DASH content type", ct: "application/dash+xml", want: true},
{name: "video with params", ct: "video/mp4; charset=utf-8", want: true},
// Negative cases
{name: "text/html", ct: "text/html", want: false},
{name: "application/json", ct: "application/json", want: false},
{name: "image/png", ct: "image/png", want: false},
{name: "text/plain", ct: "text/plain", want: false},
{name: "empty string", ct: "", want: false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := isDirectVideoContentType(tt.ct)
if got != tt.want {
t.Errorf("isDirectVideoContentType(%q) = %v, want %v", tt.ct, got, tt.want)
}
})
}
}