diff --git a/crawler/start.sh b/crawler/start.sh index 73d9659..ca7b779 100755 --- a/crawler/start.sh +++ b/crawler/start.sh @@ -23,7 +23,7 @@ case "$ENV_MODE" in prod) echo "🚀 Running in PRODUCTION mode" alembic upgrade head - celery -A celery_app worker & + celery -A celery_app worker --beat & CELERY_PID=$! ;; *) diff --git a/crawler/tasks/listing_tasks.py b/crawler/tasks/listing_tasks.py index 59e99f8..205fa64 100644 --- a/crawler/tasks/listing_tasks.py +++ b/crawler/tasks/listing_tasks.py @@ -2,11 +2,10 @@ import asyncio import importlib import logging from pathlib import Path -import time from typing import Any from celery import Celery, Task from celery_app import app -from models.listing import Listing, QueryParameters +from models.listing import FurnishType, Listing, ListingType, QueryParameters from repositories.listing_repository import ListingRepository from database import engine from tasks.task_state import TaskStatus @@ -43,3 +42,20 @@ async def dump_listings_full(self: Task, parameters: QueryParameters) -> list[Li listings = await repository.get_listings(parameters) # this can be better new_listings = [l for l in listings if l.id in new_listings] return new_listings + + +@app.on_after_finalize.connect +def setup_periodic_tasks(sender, **kwargs): + sender.add_periodic_task( + 3600 * 24, # Daily updates + name='Daily dump of interesting rent listings', + dump_listings_task.s( + QueryParameters( + listing_type=ListingType.RENT, + min_bedrooms=2, + max_bedrooms=3, + min_price=2000, + max_price=4000, + ).model_dump_json() + ), + )