wrongmove/crawler/3_dump_images.py
2025-05-31 23:50:43 +00:00

36 lines
1.2 KiB
Python

import asyncio
import json
import aiohttp
from tqdm.asyncio import tqdm
from data_access import Listing
# Setting this too high either crashes rightmove or gets us blocked
semaphore = asyncio.Semaphore(10)
async def dump_images(listing_paths: list[str]):
listings = Listing.get_all_listings(listing_paths)
await tqdm.gather(*[dump_images_for_listing(listing) for listing in listings])
async def dump_images_for_listing(listing: Listing):
with open(listing.path_detail_json()) as f:
detail = json.load(f)
for photo in detail["property"]["floorplans"]:
url = photo["url"]
picname = url.split("/")[-1]
order = photo["order"]
p = listing.path_floorplan_file(order, picname)
if p.exists():
continue
try:
async with aiohttp.ClientSession() as session:
async with semaphore:
async with session.get(url) as response:
if response.status != 200:
raise Exception(f"Error for {url}: {response.status}")
with open(p, "wb") as f:
f.write(await response.read())
except Exception as e:
tqdm.write(f"Error for {url}: {e}")