reuse query params when exporting to immoweb and allow filtering from available date
This commit is contained in:
parent
a23a5ae192
commit
11315359d2
6 changed files with 10207 additions and 42335 deletions
|
|
@ -1,29 +1,11 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import pathlib
|
import pathlib
|
||||||
from rec.query import ListingType, listing_query, FurnishType
|
from rec.query import listing_query, QueryParameters
|
||||||
from rec.districts import get_districts
|
from rec.districts import get_districts
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class QueryParameters:
|
|
||||||
listing_type: ListingType
|
|
||||||
min_bedrooms: int
|
|
||||||
max_bedrooms: int
|
|
||||||
min_price: int
|
|
||||||
max_price: int
|
|
||||||
district_names: set[str]
|
|
||||||
radius: float = 0
|
|
||||||
page_size: int = 500 # items per page
|
|
||||||
max_days_since_added: int = 30
|
|
||||||
furnish_types: list[FurnishType] | None = None
|
|
||||||
|
|
||||||
# The values below are not supported by rightmove
|
|
||||||
# hence we apply them after fetching
|
|
||||||
# available from; council tax
|
|
||||||
|
|
||||||
|
|
||||||
async def dump_listings(
|
async def dump_listings(
|
||||||
parameters: QueryParameters,
|
parameters: QueryParameters,
|
||||||
data_dir: pathlib.Path = pathlib.Path("data/rs/"),
|
data_dir: pathlib.Path = pathlib.Path("data/rs/"),
|
||||||
|
|
|
||||||
|
|
@ -272,10 +272,7 @@ class Listing:
|
||||||
return (now - ds).days
|
return (now - ds).days
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def last_seen(self) -> int | None:
|
def last_seen(self) -> int:
|
||||||
if not self.path_last_seen_listing().exists():
|
|
||||||
return None
|
|
||||||
|
|
||||||
with open(self.path_last_seen_listing(), "r") as f:
|
with open(self.path_last_seen_listing(), "r") as f:
|
||||||
datetime_str = json.load(f)
|
datetime_str = json.load(f)
|
||||||
dt = datetime.datetime.fromisoformat(datetime_str)
|
dt = datetime.datetime.fromisoformat(datetime_str)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
@ -8,7 +9,7 @@ import importlib
|
||||||
from rec.districts import get_districts
|
from rec.districts import get_districts
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
import csv_exporter
|
import csv_exporter
|
||||||
from rec.query import ListingType, FurnishType
|
from rec.query import ListingType, FurnishType, QueryParameters
|
||||||
from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
|
from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
|
||||||
from ui_exporter import export_immoweb as export_immoweb_ui
|
from ui_exporter import export_immoweb as export_immoweb_ui
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
|
@ -75,6 +76,12 @@ def listing_filter_options(func):
|
||||||
),
|
),
|
||||||
multiple=True,
|
multiple=True,
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--available-from",
|
||||||
|
help="Let date available from",
|
||||||
|
default=None,
|
||||||
|
type=click.DateTime(),
|
||||||
|
)
|
||||||
@wraps(func)
|
@wraps(func)
|
||||||
def wrapper(*args, **kwargs):
|
def wrapper(*args, **kwargs):
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
|
|
@ -113,9 +120,10 @@ def dump_listings(
|
||||||
max_price: int,
|
max_price: int,
|
||||||
type: str,
|
type: str,
|
||||||
furnish_types: list[str],
|
furnish_types: list[str],
|
||||||
|
available_from: datetime | None,
|
||||||
):
|
):
|
||||||
data_dir: str = ctx.obj["data_dir"]
|
data_dir: str = ctx.obj["data_dir"]
|
||||||
query_parameters = dump_listings_module.QueryParameters(
|
query_parameters = QueryParameters(
|
||||||
listing_type=ListingType[type],
|
listing_type=ListingType[type],
|
||||||
district_names=set(district),
|
district_names=set(district),
|
||||||
min_bedrooms=min_bedrooms,
|
min_bedrooms=min_bedrooms,
|
||||||
|
|
@ -123,6 +131,7 @@ def dump_listings(
|
||||||
min_price=min_price,
|
min_price=min_price,
|
||||||
max_price=max_price,
|
max_price=max_price,
|
||||||
furnish_types=[FurnishType[furnish_type] for furnish_type in furnish_types],
|
furnish_types=[FurnishType[furnish_type] for furnish_type in furnish_types],
|
||||||
|
let_date_available_from=available_from,
|
||||||
)
|
)
|
||||||
click.echo(
|
click.echo(
|
||||||
f"Running dump_listings for districts {district}, data dir {data_dir} and parameters: "
|
f"Running dump_listings for districts {district}, data dir {data_dir} and parameters: "
|
||||||
|
|
@ -257,10 +266,34 @@ def export_csv(ctx: click.core.Context, output_file: str, columns: tuple[str]):
|
||||||
resolve_path=True,
|
resolve_path=True,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
@listing_filter_options
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def export_immoweb(ctx, output_file: str):
|
def export_immoweb(
|
||||||
click.echo(f"Exporting data to {output_file}")
|
ctx,
|
||||||
asyncio.run(export_immoweb_ui(ctx, output_file))
|
output_file: str,
|
||||||
|
district: list[str],
|
||||||
|
min_bedrooms: int,
|
||||||
|
max_bedrooms: int,
|
||||||
|
min_price: int,
|
||||||
|
max_price: int,
|
||||||
|
type: str,
|
||||||
|
furnish_types: list[str],
|
||||||
|
available_from: datetime | None,
|
||||||
|
):
|
||||||
|
query_parameters = QueryParameters(
|
||||||
|
listing_type=ListingType[type],
|
||||||
|
district_names=set(district),
|
||||||
|
min_bedrooms=min_bedrooms,
|
||||||
|
max_bedrooms=max_bedrooms,
|
||||||
|
min_price=min_price,
|
||||||
|
max_price=max_price,
|
||||||
|
furnish_types=[FurnishType[furnish_type] for furnish_type in furnish_types],
|
||||||
|
let_date_available_from=available_from,
|
||||||
|
)
|
||||||
|
click.echo(
|
||||||
|
f"Exporting data to {output_file} that matches the query parameters: {query_parameters}"
|
||||||
|
)
|
||||||
|
asyncio.run(export_immoweb_ui(ctx, output_file, query_parameters))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,11 @@
|
||||||
# from diskcache import Cache
|
# from diskcache import Cache
|
||||||
import asyncio
|
import asyncio
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
import enum
|
import enum
|
||||||
from typing import Any
|
from typing import Any
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
from data_access import Listing
|
||||||
|
|
||||||
|
|
||||||
class ListingType(enum.StrEnum):
|
class ListingType(enum.StrEnum):
|
||||||
|
|
@ -16,6 +19,57 @@ class FurnishType(enum.StrEnum):
|
||||||
PART_FURNISHED = "partFurnished"
|
PART_FURNISHED = "partFurnished"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class QueryParameters:
|
||||||
|
listing_type: ListingType
|
||||||
|
min_bedrooms: int
|
||||||
|
max_bedrooms: int
|
||||||
|
min_price: int
|
||||||
|
max_price: int
|
||||||
|
district_names: set[str]
|
||||||
|
radius: float = 0
|
||||||
|
page_size: int = 500 # items per page
|
||||||
|
max_days_since_added: int = 30
|
||||||
|
furnish_types: list[FurnishType] | None = None
|
||||||
|
# The values below are not supported by rightmove
|
||||||
|
# hence we apply them after fetching
|
||||||
|
# available from; council tax
|
||||||
|
let_date_available_from: datetime | None = None
|
||||||
|
last_seen_days: int = 14
|
||||||
|
|
||||||
|
|
||||||
|
async def filter_listings(
|
||||||
|
listings: list[Listing],
|
||||||
|
query_parameters: QueryParameters,
|
||||||
|
) -> list[Listing]:
|
||||||
|
"""
|
||||||
|
Filter listings based on the provided query parameters.
|
||||||
|
"""
|
||||||
|
filtered_listings = []
|
||||||
|
for listing in listings:
|
||||||
|
if (
|
||||||
|
listing.bedrooms > query_parameters.max_bedrooms
|
||||||
|
or listing.bedrooms < query_parameters.min_bedrooms
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
if (
|
||||||
|
listing.price < query_parameters.min_price
|
||||||
|
or listing.price > query_parameters.max_price
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
if listing.last_seen > query_parameters.last_seen_days:
|
||||||
|
continue
|
||||||
|
if (
|
||||||
|
listing.letDateAvailable is not None
|
||||||
|
and query_parameters.let_date_available_from is not None
|
||||||
|
and listing.letDateAvailable < query_parameters.let_date_available_from
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
filtered_listings.append(listing)
|
||||||
|
|
||||||
|
return filtered_listings
|
||||||
|
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"Host": "api.rightmove.co.uk",
|
"Host": "api.rightmove.co.uk",
|
||||||
# 'Accept-Encoding': 'gzip, deflate, br',
|
# 'Accept-Encoding': 'gzip, deflate, br',
|
||||||
|
|
|
||||||
|
|
@ -1,47 +1,53 @@
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
|
from rec.query import QueryParameters, filter_listings
|
||||||
|
|
||||||
|
|
||||||
async def export_immoweb(ctx, output_file: str):
|
async def export_immoweb(
|
||||||
data_dir = ctx.obj['data_dir']
|
ctx,
|
||||||
output_file_path = pathlib.Path(output_file)
|
output_file: str,
|
||||||
|
query_parameters: QueryParameters | None = None,
|
||||||
|
):
|
||||||
|
data_dir = ctx.obj["data_dir"]
|
||||||
|
output_file_path = pathlib.Path(output_file)
|
||||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||||
# listing_paths = listing_paths[:10]
|
# listing_paths = listing_paths[:10]
|
||||||
listings = Listing.get_all_listings([str(path) for path in listing_paths])
|
listings = Listing.get_all_listings([str(path) for path in listing_paths])
|
||||||
|
if query_parameters is not None:
|
||||||
|
listings = await filter_listings(listings, query_parameters)
|
||||||
|
|
||||||
# Convert listings to immoweb format
|
# Convert listings to immoweb format
|
||||||
immoweb_listings = []
|
immoweb_listings = []
|
||||||
for listing in listings:
|
for listing in listings:
|
||||||
immoweb_listing = {
|
immoweb_listing = {
|
||||||
'type': 'Feature',
|
"type": "Feature",
|
||||||
'properties': {
|
"properties": {
|
||||||
'city': 'London', # change me
|
"city": "London", # change me
|
||||||
'country': 'United Kingdom',
|
"country": "United Kingdom",
|
||||||
'qm': await listing.sqm_ocr(),
|
"qm": await listing.sqm_ocr(),
|
||||||
'qmprice': round(await listing.price_per_sqm(), 2),
|
"qmprice": round(await listing.price_per_sqm(), 2),
|
||||||
'rooms': listing.bedrooms,
|
"rooms": listing.bedrooms,
|
||||||
'total_price': listing.price,
|
"total_price": listing.price,
|
||||||
'url': listing.url,
|
"url": listing.url,
|
||||||
# Additional info; the above is GeoJSON format
|
# Additional info; the above is GeoJSON format
|
||||||
# Below is all other crap we want in the UI
|
# Below is all other crap we want in the UI
|
||||||
'info': await listing.dict_nicely(),
|
"info": await listing.dict_nicely(),
|
||||||
},
|
},
|
||||||
'geometry': {
|
"geometry": {
|
||||||
'coordinates': [
|
"coordinates": [
|
||||||
listing.longitude,
|
listing.longitude,
|
||||||
listing.latitude,
|
listing.latitude,
|
||||||
],
|
],
|
||||||
'type': 'Point',
|
"type": "Point",
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
immoweb_listings.append(immoweb_listing)
|
immoweb_listings.append(immoweb_listing)
|
||||||
|
|
||||||
prefix = 'var data = '
|
prefix = "var data = "
|
||||||
serialized_data = {"type": "FeatureCollection", "features": immoweb_listings}
|
serialized_data = {"type": "FeatureCollection", "features": immoweb_listings}
|
||||||
result = prefix + json.dumps(serialized_data, indent=4)
|
result = prefix + json.dumps(serialized_data, indent=4)
|
||||||
with open(output_file_path, 'w') as f:
|
with open(output_file_path, "w") as f:
|
||||||
f.write(result)
|
f.write(result)
|
||||||
# json.dump(serialized_data, f, indent=4)
|
# json.dump(serialized_data, f, indent=4)
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue