"""Image fetcher service - downloads floorplan images for listings.""" import asyncio from pathlib import Path import aiohttp from repositories import ListingRepository from tenacity import retry, stop_after_attempt, wait_random from tqdm.asyncio import tqdm from models import Listing # Setting this too high either crashes rightmove or gets us blocked semaphore = asyncio.Semaphore(5) async def dump_images( repository: ListingRepository, image_base_path: Path = Path("data/rs/"), ) -> None: """Download floorplan images for all listings.""" listings = await repository.get_listings() updated_listings = await tqdm.gather( *[dump_images_for_listing(listing, image_base_path) for listing in listings] ) await repository.upsert_listings( [listing for listing in updated_listings if listing is not None] ) @retry(wait=wait_random(min=1, max=2), stop=stop_after_attempt(3)) async def dump_images_for_listing(listing: Listing, base_path: Path) -> Listing | None: """Download floorplan images for a single listing.""" all_floorplans = listing.additional_info.get("property", {}).get("floorplans", []) for floorplan in all_floorplans: url = floorplan["url"] picname = url.split("/")[-1] floorplan_path = Path(base_path, str(listing.id), "floorplans", picname) if floorplan_path.exists(): continue try: async with semaphore: async with aiohttp.ClientSession() as session: async with session.get(url) as response: if response.status == 404: return None if response.status != 200: raise Exception(f"Error for {url}: {response.status}") floorplan_path.parent.mkdir(parents=True, exist_ok=True) with open(floorplan_path, "wb") as f: f.write(await response.read()) listing.floorplan_image_paths.append(str(floorplan_path)) return listing except Exception as e: tqdm.write(f"Error for {url}: {e}") raise e # raise so that we retry it return None