from dataclasses import dataclass import pathlib from rec.query import ListingType, listing_query from rec.districts import get_districts from data_access import Listing @dataclass(frozen=True) class QueryParameters: listing_type: ListingType min_bedrooms: int max_bedrooms: int min_price: int max_price: int district_names: set[str] radius: float = 0 page_size: int = 500 # items per page max_days_since_added: int | None = None def dump_listings( parameters: QueryParameters, data_dir: pathlib.Path = pathlib.Path("data/rs/"), ) -> list[Listing]: districts = { district: locid for district, locid in get_districts().items() if district in parameters.district_names } print("Valid districts to scrape:", districts.keys()) listings = [] for district, locid in districts.items(): print("#### District:", district) for i in [1, 2]: try: response_json = listing_query( page=i, channel=parameters.listing_type, min_bedrooms=parameters.min_bedrooms, max_bedrooms=parameters.max_bedrooms, radius=parameters.radius, min_price=parameters.min_price, max_price=parameters.max_price, location_id=locid, page_size=parameters.page_size, max_days_since_added=parameters.max_days_since_added, ) except Exception as e: print(e) break if i == 1: print("totalAvailableResults: ", response_json["totalAvailableResults"]) if len(response_json["properties"]) == 0: break print(f"page {i}", end=", ", flush=True) for property in response_json["properties"]: identifier = property["identifier"] listing = Listing(identifier, data_dir=data_dir) listing.dump_listing(property) listings.append(listing) print() # break line as we used end=, above. return listings def main(): dump_listings() if __name__ == "__main__": main()