diff --git a/crawler/3_dump_images.py b/crawler/3_dump_images.py index 805ab46..f54f24f 100644 --- a/crawler/3_dump_images.py +++ b/crawler/3_dump_images.py @@ -3,13 +3,13 @@ import json from pathlib import Path import aiohttp from repositories import ListingRepository -from tenacity import retry, wait_random +from tenacity import retry, stop_after_attempt, wait_random from tqdm.asyncio import tqdm from models import Listing # Setting this too high either crashes rightmove or gets us blocked -semaphore = asyncio.Semaphore(10) +semaphore = asyncio.Semaphore(5) async def dump_images( @@ -25,7 +25,7 @@ async def dump_images( ) -@retry(wait=wait_random(min=1, max=2)) +@retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3)) async def dump_images_for_listing(listing: Listing, base_path: Path) -> Listing | None: all_floorplans = listing.additional_info["property"]["floorplans"] for floorplan in all_floorplans: diff --git a/crawler/api/app.py b/crawler/api/app.py index d9a1506..1a24d0a 100644 --- a/crawler/api/app.py +++ b/crawler/api/app.py @@ -103,12 +103,12 @@ async def get_task_status( return { "task_id": task_id, "status": task_result.status, - "result": json.dumps(result), + "result": result, } @app.get("/api/get_districts") -async def get_task_status( +async def get_districts( user: Annotated[User, Depends(get_current_user)], ) -> dict[str, str]: return districts.get_districts() diff --git a/crawler/rec/query.py b/crawler/rec/query.py index 884ef62..d43cd0a 100644 --- a/crawler/rec/query.py +++ b/crawler/rec/query.py @@ -6,7 +6,7 @@ from typing import Any import aiohttp from models.listing import FurnishType, ListingType, QueryParameters from rec import districts -from tenacity import retry, wait_random +from tenacity import retry, stop_after_attempt, wait_random headers = { @@ -27,7 +27,7 @@ class PropertyType(enum.StrEnum): TERRACED = "terraced" -@retry(wait=wait_random(min=1, max=2)) +@retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3)) async def detail_query(detail_id: int) -> dict[str, Any]: params = { "apiApplication": "ANDROID", @@ -44,6 +44,7 @@ async def detail_query(detail_id: int) -> dict[str, Any]: return await response.json() +@retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3)) async def listing_query( *, page: int,