reduce concurrency when fetching images + add retries

This commit is contained in:
Viktor Barzin 2025-07-01 16:12:06 +00:00
parent 59c33428c2
commit 20ff91d663
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
3 changed files with 8 additions and 7 deletions

View file

@ -3,13 +3,13 @@ import json
from pathlib import Path from pathlib import Path
import aiohttp import aiohttp
from repositories import ListingRepository from repositories import ListingRepository
from tenacity import retry, wait_random from tenacity import retry, stop_after_attempt, wait_random
from tqdm.asyncio import tqdm from tqdm.asyncio import tqdm
from models import Listing from models import Listing
# Setting this too high either crashes rightmove or gets us blocked # Setting this too high either crashes rightmove or gets us blocked
semaphore = asyncio.Semaphore(10) semaphore = asyncio.Semaphore(5)
async def dump_images( async def dump_images(
@ -25,7 +25,7 @@ async def dump_images(
) )
@retry(wait=wait_random(min=1, max=2)) @retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3))
async def dump_images_for_listing(listing: Listing, base_path: Path) -> Listing | None: async def dump_images_for_listing(listing: Listing, base_path: Path) -> Listing | None:
all_floorplans = listing.additional_info["property"]["floorplans"] all_floorplans = listing.additional_info["property"]["floorplans"]
for floorplan in all_floorplans: for floorplan in all_floorplans:

View file

@ -103,12 +103,12 @@ async def get_task_status(
return { return {
"task_id": task_id, "task_id": task_id,
"status": task_result.status, "status": task_result.status,
"result": json.dumps(result), "result": result,
} }
@app.get("/api/get_districts") @app.get("/api/get_districts")
async def get_task_status( async def get_districts(
user: Annotated[User, Depends(get_current_user)], user: Annotated[User, Depends(get_current_user)],
) -> dict[str, str]: ) -> dict[str, str]:
return districts.get_districts() return districts.get_districts()

View file

@ -6,7 +6,7 @@ from typing import Any
import aiohttp import aiohttp
from models.listing import FurnishType, ListingType, QueryParameters from models.listing import FurnishType, ListingType, QueryParameters
from rec import districts from rec import districts
from tenacity import retry, wait_random from tenacity import retry, stop_after_attempt, wait_random
headers = { headers = {
@ -27,7 +27,7 @@ class PropertyType(enum.StrEnum):
TERRACED = "terraced" TERRACED = "terraced"
@retry(wait=wait_random(min=1, max=2)) @retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3))
async def detail_query(detail_id: int) -> dict[str, Any]: async def detail_query(detail_id: int) -> dict[str, Any]:
params = { params = {
"apiApplication": "ANDROID", "apiApplication": "ANDROID",
@ -44,6 +44,7 @@ async def detail_query(detail_id: int) -> dict[str, Any]:
return await response.json() return await response.json()
@retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3))
async def listing_query( async def listing_query(
*, *,
page: int, page: int,