From 4e13dbdb7fb3ddfebc41cd1d0647c2c5d8a1dc75 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 21 Jun 2025 12:04:48 +0000 Subject: [PATCH] retry transient errors from rightmove when fetching images --- crawler/3_dump_images.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crawler/3_dump_images.py b/crawler/3_dump_images.py index 2277814..0ef3129 100644 --- a/crawler/3_dump_images.py +++ b/crawler/3_dump_images.py @@ -3,6 +3,7 @@ import json from pathlib import Path import aiohttp from repositories import ListingRepository +from tenacity import retry, wait_random from tqdm.asyncio import tqdm from models import Listing @@ -21,6 +22,7 @@ async def dump_images(repository: ListingRepository, image_base_path: Path): ) +@retry(wait=wait_random(min=1, max=2)) async def dump_images_for_listing(listing: Listing, base_path: Path) -> Listing | None: all_floorplans = listing.additional_info["property"]["floorplans"] for floorplan in all_floorplans: @@ -33,6 +35,8 @@ async def dump_images_for_listing(listing: Listing, base_path: Path) -> Listing async with semaphore: async with aiohttp.ClientSession() as session: async with session.get(url) as response: + if response.status == 404: + return None if response.status != 200: raise Exception(f"Error for {url}: {response.status}") floorplan_path.parent.mkdir(parents=True, exist_ok=True)