From 85f1e92ad7e0068201e9021263fbe9071d5f2d7b Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Mon, 11 May 2026 19:26:57 +0000 Subject: [PATCH] real-estate-crawler: populate SCRAPE_SCHEDULES (daily RENT + weekly BUY, London 1-2 bed) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires celery-beat to fire two periodic scrapes via the existing in-app SchedulesConfig mechanism. Replaces the empty-string fallback with two inline schedules expressed as Terraform-managed JSON: - london-rent-daily: every day at 03:00 UTC, RENT, London, 1-2 bed, £1900-4000 - london-buy-weekly: every Sunday at 04:00 UTC, BUY, London, 1-2 bed, £400k-1.2M Schedules live in `local.scrape_schedules` (jsonencode'd) rather than Vault — they're configuration, not secrets, and benefit from being version-controlled. The previous Vault-backed lookup (`local.notification_settings["scrape_schedules"]`) was unused. Verified live: new celery-beat pod logs `Registering periodic task: london-rent-daily at 3:0` and `london-buy-weekly at 4:0` immediately after roll-out. Also tightens the comment above the wrongmove-api `auth = "none"` line so it passes the new `scripts/check-ingress-auth-comments.py` guard (pre-existing tech debt that blocked the apply). Co-Authored-By: Claude Opus 4.7 --- stacks/real-estate-crawler/main.tf | 36 ++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/stacks/real-estate-crawler/main.tf b/stacks/real-estate-crawler/main.tf index 269da93a..f2e6a9da 100644 --- a/stacks/real-estate-crawler/main.tf +++ b/stacks/real-estate-crawler/main.tf @@ -79,6 +79,37 @@ data "kubernetes_secret" "eso_secrets" { locals { notification_settings = jsondecode(data.kubernetes_secret.eso_secrets.data["notification_settings"]) + + # Periodic scrape schedules consumed by celery-beat via SCRAPE_SCHEDULES env var. + # Schema: config/schedule_config.py:ScheduleConfig. Cron fields are UTC. + # Daily RENT London 1-2 bed £1900-4000 at 03:00 UTC (~04:00 BST). + # Weekly BUY London 1-2 bed £400k-1.2M at Sun 04:00 UTC. + scrape_schedules = jsonencode([ + { + name = "london-rent-daily" + listing_type = "RENT" + minute = "0" + hour = "3" + day_of_week = "*" + min_bedrooms = 1 + max_bedrooms = 2 + min_price = 1900 + max_price = 4000 + district_names = ["London"] + }, + { + name = "london-buy-weekly" + listing_type = "BUY" + minute = "0" + hour = "4" + day_of_week = "0" + min_bedrooms = 1 + max_bedrooms = 2 + min_price = 400000 + max_price = 1200000 + district_names = ["London"] + }, + ]) } @@ -362,9 +393,10 @@ module "ingress" { module "ingress-api" { source = "../../modules/kubernetes/ingress_factory" - # Wrongmove's public UI is Anubis-fronted (auth=none on the / path); this + # Wrongmove's public UI is Anubis-fronted (auth = "none" on the / path); this # /api ingress serves XHRs from that public UI. Forward-auth here would # break the UI. + # auth = "none": XHR endpoint for the Anubis-fronted public UI; forward-auth would break CORS. auth = "none" dns_type = "proxied" namespace = kubernetes_namespace.realestate-crawler.metadata[0].name @@ -581,7 +613,7 @@ resource "kubernetes_deployment" "realestate-crawler-celery-beat" { } env { name = "SCRAPE_SCHEDULES" - value = try(tostring(local.notification_settings["scrape_schedules"]), "") + value = local.scrape_schedules } volume_mount { name = "data"