diff --git a/crawler/3_dump_images.py b/crawler/3_dump_images.py index 2277814..0ef3129 100644 --- a/crawler/3_dump_images.py +++ b/crawler/3_dump_images.py @@ -3,6 +3,7 @@ import json from pathlib import Path import aiohttp from repositories import ListingRepository +from tenacity import retry, wait_random from tqdm.asyncio import tqdm from models import Listing @@ -21,6 +22,7 @@ async def dump_images(repository: ListingRepository, image_base_path: Path): ) +@retry(wait=wait_random(min=1, max=2)) async def dump_images_for_listing(listing: Listing, base_path: Path) -> Listing | None: all_floorplans = listing.additional_info["property"]["floorplans"] for floorplan in all_floorplans: @@ -33,6 +35,8 @@ async def dump_images_for_listing(listing: Listing, base_path: Path) -> Listing async with semaphore: async with aiohttp.ClientSession() as session: async with session.get(url) as response: + if response.status == 404: + return None if response.status != 200: raise Exception(f"Error for {url}: {response.status}") floorplan_path.parent.mkdir(parents=True, exist_ok=True)