2025-05-17 21:55:42 +00:00
|
|
|
import asyncio
|
2025-05-17 20:40:44 +00:00
|
|
|
from dataclasses import dataclass
|
2025-05-14 20:19:08 +00:00
|
|
|
import pathlib
|
2025-05-18 17:22:48 +00:00
|
|
|
from rec.query import ListingType, listing_query, FurnishType
|
2024-03-30 19:23:19 +01:00
|
|
|
from rec.districts import get_districts
|
2024-03-11 14:43:53 +00:00
|
|
|
from data_access import Listing
|
2024-03-10 18:49:39 +00:00
|
|
|
|
2025-05-11 18:59:41 +00:00
|
|
|
|
2025-05-17 20:40:44 +00:00
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class QueryParameters:
|
2025-05-17 21:22:39 +00:00
|
|
|
listing_type: ListingType
|
2025-05-17 20:40:44 +00:00
|
|
|
min_bedrooms: int
|
|
|
|
|
max_bedrooms: int
|
|
|
|
|
min_price: int
|
|
|
|
|
max_price: int
|
|
|
|
|
district_names: set[str]
|
|
|
|
|
radius: float = 0
|
|
|
|
|
page_size: int = 500 # items per page
|
2025-05-18 12:27:26 +00:00
|
|
|
max_days_since_added: int = 30
|
2025-05-18 17:22:48 +00:00
|
|
|
furnish_types: list[FurnishType] | None = None
|
|
|
|
|
|
|
|
|
|
# The values below are not supported by rightmove
|
|
|
|
|
# hence we apply them after fetching
|
|
|
|
|
# available from; council tax
|
2025-05-17 20:40:44 +00:00
|
|
|
|
|
|
|
|
|
2025-05-17 21:55:42 +00:00
|
|
|
async def dump_listings(
|
2025-05-18 12:27:26 +00:00
|
|
|
parameters: QueryParameters,
|
|
|
|
|
data_dir: pathlib.Path = pathlib.Path("data/rs/"),
|
2025-05-17 20:40:44 +00:00
|
|
|
) -> list[Listing]:
|
|
|
|
|
districts = {
|
2025-05-14 19:41:13 +00:00
|
|
|
district: locid
|
|
|
|
|
for district, locid in get_districts().items()
|
2025-05-17 20:40:44 +00:00
|
|
|
if district in parameters.district_names
|
2025-05-14 19:41:13 +00:00
|
|
|
}
|
|
|
|
|
print("Valid districts to scrape:", districts.keys())
|
2025-05-17 20:40:44 +00:00
|
|
|
listings = []
|
2025-05-11 18:59:41 +00:00
|
|
|
|
2025-05-18 12:27:26 +00:00
|
|
|
json_responses = await asyncio.gather(*[
|
|
|
|
|
listing_query(
|
|
|
|
|
page=i,
|
|
|
|
|
channel=parameters.listing_type,
|
|
|
|
|
min_bedrooms=parameters.min_bedrooms,
|
|
|
|
|
max_bedrooms=parameters.max_bedrooms,
|
|
|
|
|
radius=parameters.radius,
|
|
|
|
|
min_price=parameters.min_price,
|
|
|
|
|
max_price=parameters.max_price,
|
|
|
|
|
location_id=locid,
|
|
|
|
|
page_size=parameters.page_size,
|
|
|
|
|
max_days_since_added=parameters.max_days_since_added,
|
2025-05-18 17:22:48 +00:00
|
|
|
furnish_types=parameters.furnish_types or [],
|
2025-05-18 12:27:26 +00:00
|
|
|
) for locid in districts.values() for i in [1, 2]
|
|
|
|
|
])
|
2025-05-17 21:55:42 +00:00
|
|
|
listings = []
|
|
|
|
|
for response_json in json_responses:
|
|
|
|
|
if response_json["totalAvailableResults"] == 0:
|
|
|
|
|
print("No results found")
|
|
|
|
|
continue
|
|
|
|
|
if response_json["totalAvailableResults"] > 0:
|
2025-05-18 12:27:26 +00:00
|
|
|
print("totalAvailableResults: ",
|
|
|
|
|
response_json["totalAvailableResults"])
|
2025-05-17 21:55:42 +00:00
|
|
|
for property in response_json["properties"]:
|
|
|
|
|
identifier = property["identifier"]
|
|
|
|
|
|
|
|
|
|
listing = Listing(identifier, data_dir=data_dir)
|
|
|
|
|
listing.dump_listing(property)
|
|
|
|
|
listings.append(listing)
|
2025-05-11 18:59:41 +00:00
|
|
|
|
2025-05-17 20:40:44 +00:00
|
|
|
return listings
|