diff --git a/crawler/tasks/listing_tasks.py b/crawler/tasks/listing_tasks.py index a64b5bf..ecf3626 100644 --- a/crawler/tasks/listing_tasks.py +++ b/crawler/tasks/listing_tasks.py @@ -21,8 +21,8 @@ logger = logging.getLogger("uvicorn.error") @app.task(bind=True, pydantic=True) def dump_listings_task(self: Task, parameters_json: str) -> dict[str, Any]: parsed_parameters = QueryParameters.model_validate_json(parameters_json) - asyncio.run(dump_listings_full(task=self, parameters=parsed_parameters)) self.update_state(state="Starting...", meta={"progress": 0}) + asyncio.run(dump_listings_full(task=self, parameters=parsed_parameters)) return {"progress": 0} @@ -38,6 +38,7 @@ async def dump_listings_full( """Fetches all listings, images as well as detects floorplans""" repository = ListingRepository(engine) + task.update_state(state="Identifying missing listings", meta={"progress": 0}) missing_ids = await get_missing_listing_ids(parameters, repository) logger.info(f"Found {len(missing_ids)} missing listings") @@ -60,7 +61,7 @@ async def dump_listings_and_monitor( async def monitor() -> None: while (progress := sum(task_progress.values())) < len(missing_ids): - progress_ratio = progress / len(missing_ids) + progress_ratio = round(progress / len(missing_ids), 2) logger.error( f"Task progress: {task_progress}% ({progress} out of {len(missing_ids)})" ) @@ -151,7 +152,7 @@ async def _fetch_listings_with_semaphore( # we do 10 queries each with an increment in price range so we send more queries but each # has a smaller chance of returning more than 1.5k results - number_of_steps = 1 + number_of_steps = 10 price_step = parameters.max_price // number_of_steps for step in range(number_of_steps):