import asyncio from dataclasses import dataclass import pathlib from rec.query import listing_query, QueryParameters from rec.districts import get_districts from data_access import Listing async def dump_listings( parameters: QueryParameters, data_dir: pathlib.Path = pathlib.Path("data/rs/"), ) -> list[Listing]: if parameters.district_names: districts = { district: locid for district, locid in get_districts().items() if district in parameters.district_names } else: districts = get_districts() print("Valid districts to scrape:", districts.keys()) listings = [] semaphore = asyncio.Semaphore(5) # if too high, rightmove drops connections json_responses = await asyncio.gather( *[ listing_query( page=i, channel=parameters.listing_type, min_bedrooms=parameters.min_bedrooms, max_bedrooms=parameters.max_bedrooms, radius=parameters.radius, min_price=parameters.min_price, max_price=parameters.max_price, location_id=locid, page_size=parameters.page_size, max_days_since_added=parameters.max_days_since_added, furnish_types=parameters.furnish_types or [], semaphore=semaphore, ) for locid in districts.values() for i in [1, 2] ] ) listings = [] for response_json in json_responses: if response_json["totalAvailableResults"] == 0: print("No results found") continue if response_json["totalAvailableResults"] > 0: print("totalAvailableResults: ", response_json["totalAvailableResults"]) for property in response_json["properties"]: identifier = property["identifier"] listing = Listing(identifier, data_dir=data_dir) listing.dump_listing(property) listings.append(listing) return listings