reuse query params when exporting to immoweb and allow filtering from available date

This commit is contained in:
Viktor Barzin 2025-06-01 15:17:14 +00:00
parent a23a5ae192
commit 11315359d2
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
6 changed files with 10207 additions and 42335 deletions

View file

@ -1,29 +1,11 @@
import asyncio
from dataclasses import dataclass
import pathlib
from rec.query import ListingType, listing_query, FurnishType
from rec.query import listing_query, QueryParameters
from rec.districts import get_districts
from data_access import Listing
@dataclass(frozen=True)
class QueryParameters:
listing_type: ListingType
min_bedrooms: int
max_bedrooms: int
min_price: int
max_price: int
district_names: set[str]
radius: float = 0
page_size: int = 500 # items per page
max_days_since_added: int = 30
furnish_types: list[FurnishType] | None = None
# The values below are not supported by rightmove
# hence we apply them after fetching
# available from; council tax
async def dump_listings(
parameters: QueryParameters,
data_dir: pathlib.Path = pathlib.Path("data/rs/"),

View file

@ -272,10 +272,7 @@ class Listing:
return (now - ds).days
@property
def last_seen(self) -> int | None:
if not self.path_last_seen_listing().exists():
return None
def last_seen(self) -> int:
with open(self.path_last_seen_listing(), "r") as f:
datetime_str = json.load(f)
dt = datetime.datetime.fromisoformat(datetime_str)

View file

@ -1,4 +1,5 @@
import asyncio
from datetime import datetime
import json
import os
import pathlib
@ -8,7 +9,7 @@ import importlib
from rec.districts import get_districts
from data_access import Listing
import csv_exporter
from rec.query import ListingType, FurnishType
from rec.query import ListingType, FurnishType, QueryParameters
from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
from ui_exporter import export_immoweb as export_immoweb_ui
from functools import wraps
@ -75,6 +76,12 @@ def listing_filter_options(func):
),
multiple=True,
)
@click.option(
"--available-from",
help="Let date available from",
default=None,
type=click.DateTime(),
)
@wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
@ -113,9 +120,10 @@ def dump_listings(
max_price: int,
type: str,
furnish_types: list[str],
available_from: datetime | None,
):
data_dir: str = ctx.obj["data_dir"]
query_parameters = dump_listings_module.QueryParameters(
query_parameters = QueryParameters(
listing_type=ListingType[type],
district_names=set(district),
min_bedrooms=min_bedrooms,
@ -123,6 +131,7 @@ def dump_listings(
min_price=min_price,
max_price=max_price,
furnish_types=[FurnishType[furnish_type] for furnish_type in furnish_types],
let_date_available_from=available_from,
)
click.echo(
f"Running dump_listings for districts {district}, data dir {data_dir} and parameters: "
@ -257,10 +266,34 @@ def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]):
resolve_path=True,
),
)
@listing_filter_options
@click.pass_context
def export_immoweb(ctx, output_file: str):
click.echo(f"Exporting data to {output_file}")
asyncio.run(export_immoweb_ui(ctx, output_file))
def export_immoweb(
ctx,
output_file: str,
district: list[str],
min_bedrooms: int,
max_bedrooms: int,
min_price: int,
max_price: int,
type: str,
furnish_types: list[str],
available_from: datetime | None,
):
query_parameters = QueryParameters(
listing_type=ListingType[type],
district_names=set(district),
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,
min_price=min_price,
max_price=max_price,
furnish_types=[FurnishType[furnish_type] for furnish_type in furnish_types],
let_date_available_from=available_from,
)
click.echo(
f"Exporting data to {output_file} that matches the query parameters: {query_parameters}"
)
asyncio.run(export_immoweb_ui(ctx, output_file, query_parameters))
if __name__ == "__main__":

View file

@ -1,8 +1,11 @@
# from diskcache import Cache
import asyncio
from dataclasses import dataclass
from datetime import datetime
import enum
from typing import Any
import aiohttp
from data_access import Listing
class ListingType(enum.StrEnum):
@ -16,6 +19,57 @@ class FurnishType(enum.StrEnum):
PART_FURNISHED = "partFurnished"
@dataclass(frozen=True)
class QueryParameters:
listing_type: ListingType
min_bedrooms: int
max_bedrooms: int
min_price: int
max_price: int
district_names: set[str]
radius: float = 0
page_size: int = 500 # items per page
max_days_since_added: int = 30
furnish_types: list[FurnishType] | None = None
# The values below are not supported by rightmove
# hence we apply them after fetching
# available from; council tax
let_date_available_from: datetime | None = None
last_seen_days: int = 14
async def filter_listings(
listings: list[Listing],
query_parameters: QueryParameters,
) -> list[Listing]:
"""
Filter listings based on the provided query parameters.
"""
filtered_listings = []
for listing in listings:
if (
listing.bedrooms > query_parameters.max_bedrooms
or listing.bedrooms < query_parameters.min_bedrooms
):
continue
if (
listing.price < query_parameters.min_price
or listing.price > query_parameters.max_price
):
continue
if listing.last_seen > query_parameters.last_seen_days:
continue
if (
listing.letDateAvailable is not None
and query_parameters.let_date_available_from is not None
and listing.letDateAvailable < query_parameters.let_date_available_from
):
continue
filtered_listings.append(listing)
return filtered_listings
headers = {
"Host": "api.rightmove.co.uk",
# 'Accept-Encoding': 'gzip, deflate, br',

View file

@ -1,47 +1,53 @@
import json
import pathlib
from data_access import Listing
from rec.query import QueryParameters, filter_listings
async def export_immoweb(ctx, output_file: str):
data_dir = ctx.obj['data_dir']
output_file_path = pathlib.Path(output_file)
async def export_immoweb(
ctx,
output_file: str,
query_parameters: QueryParameters | None = None,
):
data_dir = ctx.obj["data_dir"]
output_file_path = pathlib.Path(output_file)
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
# listing_paths = listing_paths[:10]
listings = Listing.get_all_listings([str(path) for path in listing_paths])
if query_parameters is not None:
listings = await filter_listings(listings, query_parameters)
# Convert listings to immoweb format
immoweb_listings = []
for listing in listings:
immoweb_listing = {
'type': 'Feature',
'properties': {
'city': 'London', # change me
'country': 'United Kingdom',
'qm': await listing.sqm_ocr(),
'qmprice': round(await listing.price_per_sqm(), 2),
'rooms': listing.bedrooms,
'total_price': listing.price,
'url': listing.url,
"type": "Feature",
"properties": {
"city": "London", # change me
"country": "United Kingdom",
"qm": await listing.sqm_ocr(),
"qmprice": round(await listing.price_per_sqm(), 2),
"rooms": listing.bedrooms,
"total_price": listing.price,
"url": listing.url,
# Additional info; the above is GeoJSON format
# Below is all other crap we want in the UI
'info': await listing.dict_nicely(),
"info": await listing.dict_nicely(),
},
'geometry': {
'coordinates': [
"geometry": {
"coordinates": [
listing.longitude,
listing.latitude,
],
'type': 'Point',
}
"type": "Point",
},
}
immoweb_listings.append(immoweb_listing)
prefix = 'var data = '
prefix = "var data = "
serialized_data = {"type": "FeatureCollection", "features": immoweb_listings}
result = prefix + json.dumps(serialized_data, indent=4)
with open(output_file_path, 'w') as f:
with open(output_file_path, "w") as f:
f.write(result)
# json.dump(serialized_data, f, indent=4)

File diff suppressed because it is too large Load diff