infra/stacks/f1-stream/module/files/internal/scraper/scraper.go
Viktor Barzin e225e81ebf
[ci skip] Move Terraform modules into stack directories
Move all 88 service modules (66 individual + 22 platform) from
modules/kubernetes/<service>/ into their corresponding stack directories:

- Service stacks: stacks/<service>/module/
- Platform stack: stacks/platform/modules/<service>/

This collocates module source code with its Terragrunt definition.
Only shared utility modules remain in modules/kubernetes/:
ingress_factory, setup_tls_secret, dockerhub_secret, oauth-proxy.

All cross-references to shared modules updated to use correct
relative paths. Verified with terragrunt run --all -- plan:
0 adds, 0 destroys across all 68 stacks.
2026-02-22 14:38:14 +00:00

105 lines
2.3 KiB
Go

package scraper
import (
"context"
"log"
"sync"
"time"
"f1-stream/internal/models"
"f1-stream/internal/store"
)
type Scraper struct {
store *store.Store
interval time.Duration
validateTimeout time.Duration
mu sync.Mutex
}
func New(s *store.Store, interval time.Duration, validateTimeout time.Duration) *Scraper {
return &Scraper{store: s, interval: interval, validateTimeout: validateTimeout}
}
func (s *Scraper) Run(ctx context.Context) {
log.Printf("scraper: starting with interval %v", s.interval)
// Run immediately on start
s.scrape()
ticker := time.NewTicker(s.interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
log.Println("scraper: shutting down")
return
case <-ticker.C:
s.scrape()
}
}
}
func (s *Scraper) TriggerScrape() {
go s.scrape()
}
func (s *Scraper) scrape() {
s.mu.Lock()
defer s.mu.Unlock()
start := time.Now()
log.Println("scraper: starting scrape")
links, err := scrapeReddit()
if err != nil {
log.Printf("scraper: error after %v: %v", time.Since(start).Round(time.Millisecond), err)
return
}
log.Printf("scraper: reddit scrape completed in %v, got %d links", time.Since(start).Round(time.Millisecond), len(links))
// Merge with existing links, filtering out non-F1 entries
existing, err := s.store.LoadScrapedLinks()
if err != nil {
log.Printf("scraper: failed to load existing links: %v", err)
existing = nil
}
seen := make(map[string]bool)
var filtered []models.ScrapedLink
for _, l := range existing {
if !isF1Post(l.Title) {
continue
}
norm := normalizeURL(l.URL)
seen[norm] = true
filtered = append(filtered, l)
}
existing = filtered
added := 0
for _, l := range links {
norm := normalizeURL(l.URL)
if !seen[norm] {
existing = append(existing, l)
seen[norm] = true
added++
}
}
if err := s.store.SaveScrapedLinks(existing); err != nil {
log.Printf("scraper: failed to save: %v", err)
return
}
// Auto-publish newly validated links as streams
for _, l := range links {
if err := s.store.PublishScrapedStream(l.URL, l.Title); err != nil {
u := l.URL
if len(u) > 80 {
u = u[:80] + "..."
}
log.Printf("scraper: failed to auto-publish %s: %v", u, err)
}
}
log.Printf("scraper: done in %v, added %d new links (total: %d)", time.Since(start).Round(time.Millisecond), added, len(existing))
}