reuse query params when exporting to immoweb and allow filtering from available date
This commit is contained in:
parent
a23a5ae192
commit
11315359d2
6 changed files with 10207 additions and 42335 deletions
|
|
@ -1,29 +1,11 @@
|
|||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
import pathlib
|
||||
from rec.query import ListingType, listing_query, FurnishType
|
||||
from rec.query import listing_query, QueryParameters
|
||||
from rec.districts import get_districts
|
||||
from data_access import Listing
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QueryParameters:
|
||||
listing_type: ListingType
|
||||
min_bedrooms: int
|
||||
max_bedrooms: int
|
||||
min_price: int
|
||||
max_price: int
|
||||
district_names: set[str]
|
||||
radius: float = 0
|
||||
page_size: int = 500 # items per page
|
||||
max_days_since_added: int = 30
|
||||
furnish_types: list[FurnishType] | None = None
|
||||
|
||||
# The values below are not supported by rightmove
|
||||
# hence we apply them after fetching
|
||||
# available from; council tax
|
||||
|
||||
|
||||
async def dump_listings(
|
||||
parameters: QueryParameters,
|
||||
data_dir: pathlib.Path = pathlib.Path("data/rs/"),
|
||||
|
|
|
|||
|
|
@ -272,10 +272,7 @@ class Listing:
|
|||
return (now - ds).days
|
||||
|
||||
@property
|
||||
def last_seen(self) -> int | None:
|
||||
if not self.path_last_seen_listing().exists():
|
||||
return None
|
||||
|
||||
def last_seen(self) -> int:
|
||||
with open(self.path_last_seen_listing(), "r") as f:
|
||||
datetime_str = json.load(f)
|
||||
dt = datetime.datetime.fromisoformat(datetime_str)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import asyncio
|
||||
from datetime import datetime
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
|
|
@ -8,7 +9,7 @@ import importlib
|
|||
from rec.districts import get_districts
|
||||
from data_access import Listing
|
||||
import csv_exporter
|
||||
from rec.query import ListingType, FurnishType
|
||||
from rec.query import ListingType, FurnishType, QueryParameters
|
||||
from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
|
||||
from ui_exporter import export_immoweb as export_immoweb_ui
|
||||
from functools import wraps
|
||||
|
|
@ -75,6 +76,12 @@ def listing_filter_options(func):
|
|||
),
|
||||
multiple=True,
|
||||
)
|
||||
@click.option(
|
||||
"--available-from",
|
||||
help="Let date available from",
|
||||
default=None,
|
||||
type=click.DateTime(),
|
||||
)
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
return func(*args, **kwargs)
|
||||
|
|
@ -113,9 +120,10 @@ def dump_listings(
|
|||
max_price: int,
|
||||
type: str,
|
||||
furnish_types: list[str],
|
||||
available_from: datetime | None,
|
||||
):
|
||||
data_dir: str = ctx.obj["data_dir"]
|
||||
query_parameters = dump_listings_module.QueryParameters(
|
||||
query_parameters = QueryParameters(
|
||||
listing_type=ListingType[type],
|
||||
district_names=set(district),
|
||||
min_bedrooms=min_bedrooms,
|
||||
|
|
@ -123,6 +131,7 @@ def dump_listings(
|
|||
min_price=min_price,
|
||||
max_price=max_price,
|
||||
furnish_types=[FurnishType[furnish_type] for furnish_type in furnish_types],
|
||||
let_date_available_from=available_from,
|
||||
)
|
||||
click.echo(
|
||||
f"Running dump_listings for districts {district}, data dir {data_dir} and parameters: "
|
||||
|
|
@ -257,10 +266,34 @@ def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]):
|
|||
resolve_path=True,
|
||||
),
|
||||
)
|
||||
@listing_filter_options
|
||||
@click.pass_context
|
||||
def export_immoweb(ctx, output_file: str):
|
||||
click.echo(f"Exporting data to {output_file}")
|
||||
asyncio.run(export_immoweb_ui(ctx, output_file))
|
||||
def export_immoweb(
|
||||
ctx,
|
||||
output_file: str,
|
||||
district: list[str],
|
||||
min_bedrooms: int,
|
||||
max_bedrooms: int,
|
||||
min_price: int,
|
||||
max_price: int,
|
||||
type: str,
|
||||
furnish_types: list[str],
|
||||
available_from: datetime | None,
|
||||
):
|
||||
query_parameters = QueryParameters(
|
||||
listing_type=ListingType[type],
|
||||
district_names=set(district),
|
||||
min_bedrooms=min_bedrooms,
|
||||
max_bedrooms=max_bedrooms,
|
||||
min_price=min_price,
|
||||
max_price=max_price,
|
||||
furnish_types=[FurnishType[furnish_type] for furnish_type in furnish_types],
|
||||
let_date_available_from=available_from,
|
||||
)
|
||||
click.echo(
|
||||
f"Exporting data to {output_file} that matches the query parameters: {query_parameters}"
|
||||
)
|
||||
asyncio.run(export_immoweb_ui(ctx, output_file, query_parameters))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
# from diskcache import Cache
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
import enum
|
||||
from typing import Any
|
||||
import aiohttp
|
||||
from data_access import Listing
|
||||
|
||||
|
||||
class ListingType(enum.StrEnum):
|
||||
|
|
@ -16,6 +19,57 @@ class FurnishType(enum.StrEnum):
|
|||
PART_FURNISHED = "partFurnished"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QueryParameters:
|
||||
listing_type: ListingType
|
||||
min_bedrooms: int
|
||||
max_bedrooms: int
|
||||
min_price: int
|
||||
max_price: int
|
||||
district_names: set[str]
|
||||
radius: float = 0
|
||||
page_size: int = 500 # items per page
|
||||
max_days_since_added: int = 30
|
||||
furnish_types: list[FurnishType] | None = None
|
||||
# The values below are not supported by rightmove
|
||||
# hence we apply them after fetching
|
||||
# available from; council tax
|
||||
let_date_available_from: datetime | None = None
|
||||
last_seen_days: int = 14
|
||||
|
||||
|
||||
async def filter_listings(
|
||||
listings: list[Listing],
|
||||
query_parameters: QueryParameters,
|
||||
) -> list[Listing]:
|
||||
"""
|
||||
Filter listings based on the provided query parameters.
|
||||
"""
|
||||
filtered_listings = []
|
||||
for listing in listings:
|
||||
if (
|
||||
listing.bedrooms > query_parameters.max_bedrooms
|
||||
or listing.bedrooms < query_parameters.min_bedrooms
|
||||
):
|
||||
continue
|
||||
if (
|
||||
listing.price < query_parameters.min_price
|
||||
or listing.price > query_parameters.max_price
|
||||
):
|
||||
continue
|
||||
if listing.last_seen > query_parameters.last_seen_days:
|
||||
continue
|
||||
if (
|
||||
listing.letDateAvailable is not None
|
||||
and query_parameters.let_date_available_from is not None
|
||||
and listing.letDateAvailable < query_parameters.let_date_available_from
|
||||
):
|
||||
continue
|
||||
filtered_listings.append(listing)
|
||||
|
||||
return filtered_listings
|
||||
|
||||
|
||||
headers = {
|
||||
"Host": "api.rightmove.co.uk",
|
||||
# 'Accept-Encoding': 'gzip, deflate, br',
|
||||
|
|
|
|||
|
|
@ -1,47 +1,53 @@
|
|||
|
||||
import json
|
||||
import pathlib
|
||||
|
||||
from data_access import Listing
|
||||
from rec.query import QueryParameters, filter_listings
|
||||
|
||||
|
||||
async def export_immoweb(ctx, output_file: str):
|
||||
data_dir = ctx.obj['data_dir']
|
||||
output_file_path = pathlib.Path(output_file)
|
||||
async def export_immoweb(
|
||||
ctx,
|
||||
output_file: str,
|
||||
query_parameters: QueryParameters | None = None,
|
||||
):
|
||||
data_dir = ctx.obj["data_dir"]
|
||||
output_file_path = pathlib.Path(output_file)
|
||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||
# listing_paths = listing_paths[:10]
|
||||
listings = Listing.get_all_listings([str(path) for path in listing_paths])
|
||||
|
||||
if query_parameters is not None:
|
||||
listings = await filter_listings(listings, query_parameters)
|
||||
|
||||
# Convert listings to immoweb format
|
||||
immoweb_listings = []
|
||||
for listing in listings:
|
||||
immoweb_listing = {
|
||||
'type': 'Feature',
|
||||
'properties': {
|
||||
'city': 'London', # change me
|
||||
'country': 'United Kingdom',
|
||||
'qm': await listing.sqm_ocr(),
|
||||
'qmprice': round(await listing.price_per_sqm(), 2),
|
||||
'rooms': listing.bedrooms,
|
||||
'total_price': listing.price,
|
||||
'url': listing.url,
|
||||
"type": "Feature",
|
||||
"properties": {
|
||||
"city": "London", # change me
|
||||
"country": "United Kingdom",
|
||||
"qm": await listing.sqm_ocr(),
|
||||
"qmprice": round(await listing.price_per_sqm(), 2),
|
||||
"rooms": listing.bedrooms,
|
||||
"total_price": listing.price,
|
||||
"url": listing.url,
|
||||
# Additional info; the above is GeoJSON format
|
||||
# Below is all other crap we want in the UI
|
||||
'info': await listing.dict_nicely(),
|
||||
"info": await listing.dict_nicely(),
|
||||
},
|
||||
'geometry': {
|
||||
'coordinates': [
|
||||
"geometry": {
|
||||
"coordinates": [
|
||||
listing.longitude,
|
||||
listing.latitude,
|
||||
],
|
||||
'type': 'Point',
|
||||
}
|
||||
"type": "Point",
|
||||
},
|
||||
}
|
||||
immoweb_listings.append(immoweb_listing)
|
||||
|
||||
prefix = 'var data = '
|
||||
|
||||
prefix = "var data = "
|
||||
serialized_data = {"type": "FeatureCollection", "features": immoweb_listings}
|
||||
result = prefix + json.dumps(serialized_data, indent=4)
|
||||
with open(output_file_path, 'w') as f:
|
||||
with open(output_file_path, "w") as f:
|
||||
f.write(result)
|
||||
# json.dump(serialized_data, f, indent=4)
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue