Add configurable scheduling, UI health/task indicators, and auto-load map with default filters
This commit is contained in:
parent
1c8c3e4657
commit
c7ac448f15
18 changed files with 2287 additions and 656 deletions
|
|
@ -1,35 +1,50 @@
|
|||
import asyncio
|
||||
import importlib
|
||||
import itertools
|
||||
import logging
|
||||
from typing import Any
|
||||
from celery import Task
|
||||
from celery.schedules import crontab
|
||||
from celery_app import app
|
||||
from config.schedule_config import SchedulesConfig
|
||||
from listing_processor import ListingProcessor
|
||||
from models.listing import Listing, ListingType, QueryParameters
|
||||
from models.listing import Listing, QueryParameters
|
||||
from rec.districts import get_districts
|
||||
from rec.query import listing_query
|
||||
from repositories.listing_repository import ListingRepository
|
||||
from database import engine
|
||||
|
||||
dump_images_module = importlib.import_module("3_dump_images")
|
||||
detect_floorplan_module = importlib.import_module("4_detect_floorplan")
|
||||
from services import image_fetcher, floorplan_detector
|
||||
from utils.redis_lock import redis_lock
|
||||
|
||||
logger = logging.getLogger("uvicorn.error")
|
||||
|
||||
SCRAPE_LOCK_NAME = "scrape_listings"
|
||||
|
||||
|
||||
@app.task(bind=True, pydantic=True)
|
||||
def dump_listings_task(self: Task, parameters_json: str) -> dict[str, Any]:
|
||||
parsed_parameters = QueryParameters.model_validate_json(parameters_json)
|
||||
self.update_state(state="Starting...", meta={"progress": 0})
|
||||
asyncio.run(dump_listings_full(task=self, parameters=parsed_parameters))
|
||||
return {"progress": 0}
|
||||
with redis_lock(SCRAPE_LOCK_NAME) as acquired:
|
||||
if not acquired:
|
||||
logger.warning("Another scrape job is already running, skipping this execution")
|
||||
self.update_state(state="SKIPPED", meta={"reason": "Another scrape job is running"})
|
||||
return {"status": "skipped", "reason": "another_job_running"}
|
||||
|
||||
logger.info(f"Acquired lock: {SCRAPE_LOCK_NAME}")
|
||||
parsed_parameters = QueryParameters.model_validate_json(parameters_json)
|
||||
self.update_state(state="Starting...", meta={"progress": 0})
|
||||
asyncio.run(dump_listings_full(task=self, parameters=parsed_parameters))
|
||||
return {"progress": 0}
|
||||
|
||||
|
||||
async def async_dump_listings_task(parameters_json: str) -> dict[str, Any]:
|
||||
parsed_parameters = QueryParameters.model_validate_json(parameters_json)
|
||||
await dump_listings_full(task=Task(), parameters=parsed_parameters)
|
||||
return {"progress": 0}
|
||||
with redis_lock(SCRAPE_LOCK_NAME) as acquired:
|
||||
if not acquired:
|
||||
logger.warning("Another scrape job is already running, skipping this execution")
|
||||
return {"status": "skipped", "reason": "another_job_running"}
|
||||
|
||||
logger.info(f"Acquired lock: {SCRAPE_LOCK_NAME}")
|
||||
parsed_parameters = QueryParameters.model_validate_json(parameters_json)
|
||||
await dump_listings_full(task=Task(), parameters=parsed_parameters)
|
||||
return {"progress": 0}
|
||||
|
||||
|
||||
async def dump_listings_full(
|
||||
|
|
@ -83,19 +98,27 @@ async def dump_listings_and_monitor(
|
|||
|
||||
@app.on_after_finalize.connect
|
||||
def setup_periodic_tasks(sender, **kwargs):
|
||||
sender.add_periodic_task(
|
||||
3600 * 24, # Daily updates
|
||||
dump_listings_task.s(
|
||||
QueryParameters(
|
||||
listing_type=ListingType.RENT,
|
||||
min_bedrooms=2,
|
||||
max_bedrooms=3,
|
||||
min_price=2000,
|
||||
max_price=4000,
|
||||
).model_dump_json()
|
||||
),
|
||||
name="Daily dump of interesting rent listings",
|
||||
)
|
||||
"""Register periodic tasks from environment configuration."""
|
||||
try:
|
||||
config = SchedulesConfig.from_env()
|
||||
except ValueError as e:
|
||||
logger.error(f"Failed to load schedule configuration: {e}")
|
||||
return
|
||||
|
||||
for schedule in config.get_enabled_schedules():
|
||||
logger.info(
|
||||
f"Registering periodic task: {schedule.name} at {schedule.hour}:{schedule.minute}"
|
||||
)
|
||||
|
||||
sender.add_periodic_task(
|
||||
crontab(
|
||||
minute=schedule.minute,
|
||||
hour=schedule.hour,
|
||||
day_of_week=schedule.day_of_week,
|
||||
),
|
||||
dump_listings_task.s(schedule.to_query_parameters().model_dump_json()),
|
||||
name=schedule.name,
|
||||
)
|
||||
|
||||
|
||||
async def get_ids_to_process(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue