reuse query params when exporting to immoweb and allow filtering from available date

This commit is contained in:
Viktor Barzin 2025-06-01 15:17:14 +00:00
parent a23a5ae192
commit 11315359d2
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
6 changed files with 10207 additions and 42335 deletions

View file

@ -1,29 +1,11 @@
import asyncio import asyncio
from dataclasses import dataclass from dataclasses import dataclass
import pathlib import pathlib
from rec.query import ListingType, listing_query, FurnishType from rec.query import listing_query, QueryParameters
from rec.districts import get_districts from rec.districts import get_districts
from data_access import Listing from data_access import Listing
@dataclass(frozen=True)
class QueryParameters:
listing_type: ListingType
min_bedrooms: int
max_bedrooms: int
min_price: int
max_price: int
district_names: set[str]
radius: float = 0
page_size: int = 500 # items per page
max_days_since_added: int = 30
furnish_types: list[FurnishType] | None = None
# The values below are not supported by rightmove
# hence we apply them after fetching
# available from; council tax
async def dump_listings( async def dump_listings(
parameters: QueryParameters, parameters: QueryParameters,
data_dir: pathlib.Path = pathlib.Path("data/rs/"), data_dir: pathlib.Path = pathlib.Path("data/rs/"),

View file

@ -272,10 +272,7 @@ class Listing:
return (now - ds).days return (now - ds).days
@property @property
def last_seen(self) -> int | None: def last_seen(self) -> int:
if not self.path_last_seen_listing().exists():
return None
with open(self.path_last_seen_listing(), "r") as f: with open(self.path_last_seen_listing(), "r") as f:
datetime_str = json.load(f) datetime_str = json.load(f)
dt = datetime.datetime.fromisoformat(datetime_str) dt = datetime.datetime.fromisoformat(datetime_str)

View file

@ -1,4 +1,5 @@
import asyncio import asyncio
from datetime import datetime
import json import json
import os import os
import pathlib import pathlib
@ -8,7 +9,7 @@ import importlib
from rec.districts import get_districts from rec.districts import get_districts
from data_access import Listing from data_access import Listing
import csv_exporter import csv_exporter
from rec.query import ListingType, FurnishType from rec.query import ListingType, FurnishType, QueryParameters
from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
from ui_exporter import export_immoweb as export_immoweb_ui from ui_exporter import export_immoweb as export_immoweb_ui
from functools import wraps from functools import wraps
@ -75,6 +76,12 @@ def listing_filter_options(func):
), ),
multiple=True, multiple=True,
) )
@click.option(
"--available-from",
help="Let date available from",
default=None,
type=click.DateTime(),
)
@wraps(func) @wraps(func)
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
return func(*args, **kwargs) return func(*args, **kwargs)
@ -113,9 +120,10 @@ def dump_listings(
max_price: int, max_price: int,
type: str, type: str,
furnish_types: list[str], furnish_types: list[str],
available_from: datetime | None,
): ):
data_dir: str = ctx.obj["data_dir"] data_dir: str = ctx.obj["data_dir"]
query_parameters = dump_listings_module.QueryParameters( query_parameters = QueryParameters(
listing_type=ListingType[type], listing_type=ListingType[type],
district_names=set(district), district_names=set(district),
min_bedrooms=min_bedrooms, min_bedrooms=min_bedrooms,
@ -123,6 +131,7 @@ def dump_listings(
min_price=min_price, min_price=min_price,
max_price=max_price, max_price=max_price,
furnish_types=[FurnishType[furnish_type] for furnish_type in furnish_types], furnish_types=[FurnishType[furnish_type] for furnish_type in furnish_types],
let_date_available_from=available_from,
) )
click.echo( click.echo(
f"Running dump_listings for districts {district}, data dir {data_dir} and parameters: " f"Running dump_listings for districts {district}, data dir {data_dir} and parameters: "
@ -257,10 +266,34 @@ def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]):
resolve_path=True, resolve_path=True,
), ),
) )
@listing_filter_options
@click.pass_context @click.pass_context
def export_immoweb(ctx, output_file: str): def export_immoweb(
click.echo(f"Exporting data to {output_file}") ctx,
asyncio.run(export_immoweb_ui(ctx, output_file)) output_file: str,
district: list[str],
min_bedrooms: int,
max_bedrooms: int,
min_price: int,
max_price: int,
type: str,
furnish_types: list[str],
available_from: datetime | None,
):
query_parameters = QueryParameters(
listing_type=ListingType[type],
district_names=set(district),
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,
min_price=min_price,
max_price=max_price,
furnish_types=[FurnishType[furnish_type] for furnish_type in furnish_types],
let_date_available_from=available_from,
)
click.echo(
f"Exporting data to {output_file} that matches the query parameters: {query_parameters}"
)
asyncio.run(export_immoweb_ui(ctx, output_file, query_parameters))
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,8 +1,11 @@
# from diskcache import Cache # from diskcache import Cache
import asyncio import asyncio
from dataclasses import dataclass
from datetime import datetime
import enum import enum
from typing import Any from typing import Any
import aiohttp import aiohttp
from data_access import Listing
class ListingType(enum.StrEnum): class ListingType(enum.StrEnum):
@ -16,6 +19,57 @@ class FurnishType(enum.StrEnum):
PART_FURNISHED = "partFurnished" PART_FURNISHED = "partFurnished"
@dataclass(frozen=True)
class QueryParameters:
listing_type: ListingType
min_bedrooms: int
max_bedrooms: int
min_price: int
max_price: int
district_names: set[str]
radius: float = 0
page_size: int = 500 # items per page
max_days_since_added: int = 30
furnish_types: list[FurnishType] | None = None
# The values below are not supported by rightmove
# hence we apply them after fetching
# available from; council tax
let_date_available_from: datetime | None = None
last_seen_days: int = 14
async def filter_listings(
listings: list[Listing],
query_parameters: QueryParameters,
) -> list[Listing]:
"""
Filter listings based on the provided query parameters.
"""
filtered_listings = []
for listing in listings:
if (
listing.bedrooms > query_parameters.max_bedrooms
or listing.bedrooms < query_parameters.min_bedrooms
):
continue
if (
listing.price < query_parameters.min_price
or listing.price > query_parameters.max_price
):
continue
if listing.last_seen > query_parameters.last_seen_days:
continue
if (
listing.letDateAvailable is not None
and query_parameters.let_date_available_from is not None
and listing.letDateAvailable < query_parameters.let_date_available_from
):
continue
filtered_listings.append(listing)
return filtered_listings
headers = { headers = {
"Host": "api.rightmove.co.uk", "Host": "api.rightmove.co.uk",
# 'Accept-Encoding': 'gzip, deflate, br', # 'Accept-Encoding': 'gzip, deflate, br',

View file

@ -1,47 +1,53 @@
import json import json
import pathlib import pathlib
from data_access import Listing from data_access import Listing
from rec.query import QueryParameters, filter_listings
async def export_immoweb(ctx, output_file: str): async def export_immoweb(
data_dir = ctx.obj['data_dir'] ctx,
output_file_path = pathlib.Path(output_file) output_file: str,
query_parameters: QueryParameters | None = None,
):
data_dir = ctx.obj["data_dir"]
output_file_path = pathlib.Path(output_file)
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json"))) listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
# listing_paths = listing_paths[:10] # listing_paths = listing_paths[:10]
listings = Listing.get_all_listings([str(path) for path in listing_paths]) listings = Listing.get_all_listings([str(path) for path in listing_paths])
if query_parameters is not None:
listings = await filter_listings(listings, query_parameters)
# Convert listings to immoweb format # Convert listings to immoweb format
immoweb_listings = [] immoweb_listings = []
for listing in listings: for listing in listings:
immoweb_listing = { immoweb_listing = {
'type': 'Feature', "type": "Feature",
'properties': { "properties": {
'city': 'London', # change me "city": "London", # change me
'country': 'United Kingdom', "country": "United Kingdom",
'qm': await listing.sqm_ocr(), "qm": await listing.sqm_ocr(),
'qmprice': round(await listing.price_per_sqm(), 2), "qmprice": round(await listing.price_per_sqm(), 2),
'rooms': listing.bedrooms, "rooms": listing.bedrooms,
'total_price': listing.price, "total_price": listing.price,
'url': listing.url, "url": listing.url,
# Additional info; the above is GeoJSON format # Additional info; the above is GeoJSON format
# Below is all other crap we want in the UI # Below is all other crap we want in the UI
'info': await listing.dict_nicely(), "info": await listing.dict_nicely(),
}, },
'geometry': { "geometry": {
'coordinates': [ "coordinates": [
listing.longitude, listing.longitude,
listing.latitude, listing.latitude,
], ],
'type': 'Point', "type": "Point",
} },
} }
immoweb_listings.append(immoweb_listing) immoweb_listings.append(immoweb_listing)
prefix = 'var data = ' prefix = "var data = "
serialized_data = {"type": "FeatureCollection", "features": immoweb_listings} serialized_data = {"type": "FeatureCollection", "features": immoweb_listings}
result = prefix + json.dumps(serialized_data, indent=4) result = prefix + json.dumps(serialized_data, indent=4)
with open(output_file_path, 'w') as f: with open(output_file_path, "w") as f:
f.write(result) f.write(result)
# json.dump(serialized_data, f, indent=4) # json.dump(serialized_data, f, indent=4)

File diff suppressed because it is too large Load diff