45 lines
1.9 KiB
Python
45 lines
1.9 KiB
Python
import asyncio
|
|
import importlib
|
|
import logging
|
|
from pathlib import Path
|
|
import time
|
|
from typing import Any
|
|
from celery import Celery, Task
|
|
from celery_app import app
|
|
from models.listing import Listing, QueryParameters
|
|
from repositories.listing_repository import ListingRepository
|
|
from database import engine
|
|
from tasks.task_state import TaskStatus
|
|
|
|
dump_listings_module = importlib.import_module("1_dump_listings")
|
|
dump_images_module = importlib.import_module("3_dump_images")
|
|
detect_floorplan_module = importlib.import_module("4_detect_floorplan")
|
|
|
|
logger = logging.getLogger("uvicorn.error")
|
|
|
|
|
|
@app.task(bind=True)
|
|
def dump_listings_task(self: Task, parameters_json: str) -> dict[str, Any]:
|
|
parsed_parameters = QueryParameters.model_validate_json(parameters_json)
|
|
asyncio.run(dump_listings_full(self, parsed_parameters))
|
|
return {"progress": 1}
|
|
|
|
|
|
async def dump_listings_full(self: Task, parameters: QueryParameters) -> list[Listing]:
|
|
"""Fetches all listings, images as well as detects floorplans"""
|
|
self.update_state(state="FETCHING_LISTINGS", meta={"progress": 0.1})
|
|
repository = ListingRepository(engine)
|
|
new_listings = await dump_listings_module.dump_listings(parameters, repository)
|
|
self.update_state(state="FETCHING_FLOORPLANS", meta={"progress": 0.3})
|
|
logger.debug(f"Upserted {len(new_listings)} new listings")
|
|
logger.debug("Starting to fetch floorplans")
|
|
await dump_images_module.dump_images(repository)
|
|
self.update_state(state="RUNNING_OCR_ON_FLOORPLANS", meta={"progress": 0.6})
|
|
logger.debug("Completed fetching floorplans")
|
|
logger.debug("Starting floorplan detection")
|
|
await detect_floorplan_module.detect_floorplan(repository)
|
|
logger.debug("Completed floorplan detection")
|
|
# refresh listings
|
|
listings = await repository.get_listings(parameters) # this can be better
|
|
new_listings = [l for l in listings if l.id in new_listings]
|
|
return new_listings
|