migrate background tasks to celery
This commit is contained in:
parent
efe3248c07
commit
93129333e6
7 changed files with 106 additions and 101 deletions
45
crawler/tasks/listing_tasks.py
Normal file
45
crawler/tasks/listing_tasks.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import asyncio
|
||||
import importlib
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import time
|
||||
from typing import Any
|
||||
from celery import Celery, Task
|
||||
from celery_app import app
|
||||
from models.listing import Listing, QueryParameters
|
||||
from repositories.listing_repository import ListingRepository
|
||||
from database import engine
|
||||
from tasks.task_state import TaskStatus
|
||||
|
||||
dump_listings_module = importlib.import_module("1_dump_listings")
|
||||
dump_images_module = importlib.import_module("3_dump_images")
|
||||
detect_floorplan_module = importlib.import_module("4_detect_floorplan")
|
||||
|
||||
logger = logging.getLogger("uvicorn.error")
|
||||
|
||||
|
||||
@app.task(bind=True)
|
||||
def dump_listings_task(self: Task, parameters_json: str) -> dict[str, Any]:
|
||||
parsed_parameters = QueryParameters.model_validate_json(parameters_json)
|
||||
asyncio.run(dump_listings_full(self, parsed_parameters))
|
||||
return {"progress": 1}
|
||||
|
||||
|
||||
async def dump_listings_full(self: Task, parameters: QueryParameters) -> list[Listing]:
|
||||
"""Fetches all listings, images as well as detects floorplans"""
|
||||
self.update_state(state="FETCHING_LISTINGS", meta={"progress": 0.1})
|
||||
repository = ListingRepository(engine)
|
||||
new_listings = await dump_listings_module.dump_listings(parameters, repository)
|
||||
self.update_state(state="FETCHING_FLOORPLANS", meta={"progress": 0.3})
|
||||
logger.debug(f"Upserted {len(new_listings)} new listings")
|
||||
logger.debug("Starting to fetch floorplans")
|
||||
await dump_images_module.dump_images(repository)
|
||||
self.update_state(state="RUNNING_OCR_ON_FLOORPLANS", meta={"progress": 0.6})
|
||||
logger.debug("Completed fetching floorplans")
|
||||
logger.debug("Starting floorplan detection")
|
||||
await detect_floorplan_module.detect_floorplan(repository)
|
||||
logger.debug("Completed floorplan detection")
|
||||
# refresh listings
|
||||
listings = await repository.get_listings(parameters) # this can be better
|
||||
new_listings = [l for l in listings if l.id in new_listings]
|
||||
return new_listings
|
||||
8
crawler/tasks/task_state.py
Normal file
8
crawler/tasks/task_state.py
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
import enum
|
||||
|
||||
|
||||
class TaskStatus(enum.StrEnum):
|
||||
QUEUED = "queued"
|
||||
PROCESSING = "processing"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
Loading…
Add table
Add a link
Reference in a new issue