import asyncio import json import aiohttp from tqdm.asyncio import tqdm from data_access import Listing # Setting this too high either crashes rightmove or gets us blocked semaphore = asyncio.Semaphore(10) async def dump_images(listing_paths: list[str]): listings = Listing.get_all_listings(listing_paths) await tqdm.gather(*[dump_images_for_listing(listing) for listing in listings]) async def dump_images_for_listing(listing: Listing): with open(listing.path_detail_json()) as f: detail = json.load(f) for photo in detail["property"]["floorplans"]: url = photo["url"] picname = url.split("/")[-1] order = photo["order"] p = listing.path_floorplan_file(order, picname) if p.exists(): continue try: async with aiohttp.ClientSession() as session: async with semaphore: async with session.get(url) as response: if response.status != 200: raise Exception(f"Error for {url}: {response.status}") with open(p, "wb") as f: f.write(await response.read()) except Exception as e: tqdm.write(f"Error for {url}: {e}")