Add configurable scheduling, UI health/task indicators, and auto-load map with default filters

This commit is contained in:
Viktor Barzin 2026-02-01 17:28:37 +00:00
parent 1c8c3e4657
commit c7ac448f15
18 changed files with 2287 additions and 656 deletions

View file

@ -1,35 +1,50 @@
import asyncio
import importlib
import itertools
import logging
from typing import Any
from celery import Task
from celery.schedules import crontab
from celery_app import app
from config.schedule_config import SchedulesConfig
from listing_processor import ListingProcessor
from models.listing import Listing, ListingType, QueryParameters
from models.listing import Listing, QueryParameters
from rec.districts import get_districts
from rec.query import listing_query
from repositories.listing_repository import ListingRepository
from database import engine
dump_images_module = importlib.import_module("3_dump_images")
detect_floorplan_module = importlib.import_module("4_detect_floorplan")
from services import image_fetcher, floorplan_detector
from utils.redis_lock import redis_lock
logger = logging.getLogger("uvicorn.error")
SCRAPE_LOCK_NAME = "scrape_listings"
@app.task(bind=True, pydantic=True)
def dump_listings_task(self: Task, parameters_json: str) -> dict[str, Any]:
parsed_parameters = QueryParameters.model_validate_json(parameters_json)
self.update_state(state="Starting...", meta={"progress": 0})
asyncio.run(dump_listings_full(task=self, parameters=parsed_parameters))
return {"progress": 0}
with redis_lock(SCRAPE_LOCK_NAME) as acquired:
if not acquired:
logger.warning("Another scrape job is already running, skipping this execution")
self.update_state(state="SKIPPED", meta={"reason": "Another scrape job is running"})
return {"status": "skipped", "reason": "another_job_running"}
logger.info(f"Acquired lock: {SCRAPE_LOCK_NAME}")
parsed_parameters = QueryParameters.model_validate_json(parameters_json)
self.update_state(state="Starting...", meta={"progress": 0})
asyncio.run(dump_listings_full(task=self, parameters=parsed_parameters))
return {"progress": 0}
async def async_dump_listings_task(parameters_json: str) -> dict[str, Any]:
parsed_parameters = QueryParameters.model_validate_json(parameters_json)
await dump_listings_full(task=Task(), parameters=parsed_parameters)
return {"progress": 0}
with redis_lock(SCRAPE_LOCK_NAME) as acquired:
if not acquired:
logger.warning("Another scrape job is already running, skipping this execution")
return {"status": "skipped", "reason": "another_job_running"}
logger.info(f"Acquired lock: {SCRAPE_LOCK_NAME}")
parsed_parameters = QueryParameters.model_validate_json(parameters_json)
await dump_listings_full(task=Task(), parameters=parsed_parameters)
return {"progress": 0}
async def dump_listings_full(
@ -83,19 +98,27 @@ async def dump_listings_and_monitor(
@app.on_after_finalize.connect
def setup_periodic_tasks(sender, **kwargs):
sender.add_periodic_task(
3600 * 24, # Daily updates
dump_listings_task.s(
QueryParameters(
listing_type=ListingType.RENT,
min_bedrooms=2,
max_bedrooms=3,
min_price=2000,
max_price=4000,
).model_dump_json()
),
name="Daily dump of interesting rent listings",
)
"""Register periodic tasks from environment configuration."""
try:
config = SchedulesConfig.from_env()
except ValueError as e:
logger.error(f"Failed to load schedule configuration: {e}")
return
for schedule in config.get_enabled_schedules():
logger.info(
f"Registering periodic task: {schedule.name} at {schedule.hour}:{schedule.minute}"
)
sender.add_periodic_task(
crontab(
minute=schedule.minute,
hour=schedule.hour,
day_of_week=schedule.day_of_week,
),
dump_listings_task.s(schedule.to_query_parameters().model_dump_json()),
name=schedule.name,
)
async def get_ids_to_process(