use query params to filter out models; also make csv exporter work with models
This commit is contained in:
parent
80c335ba04
commit
e317d2ec54
5 changed files with 72 additions and 113 deletions
|
|
@ -1,24 +1,26 @@
|
|||
import asyncio
|
||||
from pathlib import Path
|
||||
from data_access import Listing
|
||||
import pandas as pd
|
||||
from rec.query import QueryParameters, filter_listings
|
||||
from rec.query import QueryParameters
|
||||
from repositories.listing_repository import ListingRepository
|
||||
|
||||
|
||||
async def export_to_csv(
|
||||
listings: list[Listing],
|
||||
repository: ListingRepository,
|
||||
output_file: Path,
|
||||
columns: list[str],
|
||||
query_parameters: QueryParameters | None = None,
|
||||
) -> None:
|
||||
if query_parameters is not None:
|
||||
listings = await filter_listings(listings, query_parameters)
|
||||
ds = await asyncio.gather(*[listing.dict_nicely() for listing in listings])
|
||||
listings = await repository.get_listings(query_parameters=query_parameters)
|
||||
ds = [*[listing.__dict__ for listing in listings]]
|
||||
df = pd.DataFrame(ds)
|
||||
|
||||
# read decisions on file
|
||||
decisions_path = "data/decisions.json"
|
||||
decisions = pd.read_json(decisions_path)
|
||||
df.loc[:, "decision"] = df.identifier.apply(lambda x: decisions.get(x))
|
||||
df.loc[:, "decision"] = df.id.apply(lambda x: decisions.get(x))
|
||||
|
||||
# remove _sa_instance_state column
|
||||
drop_columns = ["_sa_instance_state", "additional_info"]
|
||||
df = df.drop(columns=drop_columns)
|
||||
|
||||
# remove all entries where we didnt calculate transit time (probably due to a too far distance)
|
||||
# df2 = df[df.travel_time_fastest.notna()]
|
||||
|
|
@ -30,9 +32,15 @@ async def export_to_csv(
|
|||
# s1 = df2
|
||||
|
||||
# fill in gap values for service charge and lease left. This is for excel so we can use filters better there
|
||||
if "service_charge" not in df2.columns:
|
||||
df2.loc[:, "service_charge"] = -1
|
||||
df2.loc[:, "service_charge"] = df2.service_charge.fillna(-1)
|
||||
if "lease_left" not in df2.columns:
|
||||
df2.loc[:, "lease_left"] = -1
|
||||
df2.loc[:, "lease_left"] = df2.lease_left.fillna(-1)
|
||||
df2.loc[:, "sqm_ocr"] = df2.sqm_ocr.fillna(-1)
|
||||
if "square_meters" not in df2.columns:
|
||||
df2.loc[:, "square_meters"] = -1
|
||||
df2.loc[:, "square_meters"] = df2.square_meters.fillna(-1)
|
||||
|
||||
df3 = df2
|
||||
# df3 = pd.concat([df2.drop(['travel_time_fastest', 'travel_time_second'], axis=1), s1], axis=1)
|
||||
|
|
@ -40,6 +48,11 @@ async def export_to_csv(
|
|||
df3.shape
|
||||
df4 = df3
|
||||
|
||||
df5 = df4[columns]
|
||||
# df5 = df4[columns]
|
||||
|
||||
# Add some interesting columns
|
||||
df4.loc[:, "price_per_sqm"] = df4.price / df4.square_meters
|
||||
df5 = df4
|
||||
|
||||
df6 = df5.sort_values(by=["price_per_sqm"], ascending=True)
|
||||
df6.to_csv(str(output_file), index=False)
|
||||
|
|
|
|||
|
|
@ -6,10 +6,10 @@ import pathlib
|
|||
import click
|
||||
import importlib
|
||||
|
||||
from models.listing import FurnishType, ListingType, QueryParameters
|
||||
from rec.districts import get_districts
|
||||
from data_access import Listing
|
||||
import csv_exporter
|
||||
from rec.query import ListingType, FurnishType, QueryParameters
|
||||
from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
|
||||
from repositories.listing_repository import ListingRepository
|
||||
from ui_exporter import export_immoweb as export_immoweb_ui
|
||||
|
|
@ -230,17 +230,18 @@ def routing(
|
|||
|
||||
|
||||
@cli.command()
|
||||
@click.option(
|
||||
"--columns",
|
||||
"-C",
|
||||
help="Columns to include in the CSV file",
|
||||
type=click.Choice(
|
||||
Listing.ALL_COLUMNS,
|
||||
case_sensitive=False,
|
||||
),
|
||||
multiple=True,
|
||||
default=Listing.ALL_COLUMNS,
|
||||
)
|
||||
# @click.option(
|
||||
# "--columns",
|
||||
# "-C",
|
||||
# help="Columns to include in the CSV file",
|
||||
# type=click.Choice(
|
||||
# # csv_exporter.get_columns_from_listings(),
|
||||
# [1],
|
||||
# case_sensitive=False,
|
||||
# ),
|
||||
# multiple=True,
|
||||
# default=Listing.ALL_COLUMNS,
|
||||
# )
|
||||
@click.option(
|
||||
"--output-file",
|
||||
"-O",
|
||||
|
|
@ -258,7 +259,7 @@ def routing(
|
|||
def export_csv(
|
||||
ctx: click.core.Context,
|
||||
output_file: str,
|
||||
columns: tuple[str],
|
||||
# columns: tuple[str],
|
||||
district: list[str],
|
||||
min_bedrooms: int,
|
||||
max_bedrooms: int,
|
||||
|
|
@ -270,6 +271,7 @@ def export_csv(
|
|||
last_seen_days: int,
|
||||
min_sqm: int | None = None,
|
||||
):
|
||||
# use model
|
||||
data_dir = ctx.obj["data_dir"]
|
||||
query_parameters = QueryParameters(
|
||||
listing_type=ListingType[type],
|
||||
|
|
@ -287,13 +289,12 @@ def export_csv(
|
|||
f"Exporting data to {output_file} using {data_dir=} and query parameters: {query_parameters}"
|
||||
)
|
||||
output_file_path = pathlib.Path(output_file)
|
||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||
listings = Listing.get_all_listings([path for path in listing_paths])
|
||||
repository = ListingRepository(engine=engine)
|
||||
asyncio.run(
|
||||
csv_exporter.export_to_csv(
|
||||
listings,
|
||||
repository,
|
||||
output_file_path,
|
||||
list(columns),
|
||||
# list(columns),
|
||||
query_parameters=query_parameters,
|
||||
),
|
||||
)
|
||||
|
|
@ -328,6 +329,7 @@ def export_immoweb(
|
|||
last_seen_days: int,
|
||||
min_sqm: int | None = None,
|
||||
):
|
||||
# use model
|
||||
query_parameters = QueryParameters(
|
||||
listing_type=ListingType[type],
|
||||
district_names=set(district),
|
||||
|
|
|
|||
|
|
@ -132,7 +132,8 @@ class Listing(SQLModel, table=False):
|
|||
class FurnishType(enum.StrEnum):
|
||||
FURNISHED = "furnished"
|
||||
UNFURNISHED = "unfurnished"
|
||||
PART_FURNISHED = "partFurnished"
|
||||
PART_FURNISHED = "part furnished"
|
||||
ASK_LANDLORD = "ask landlord"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
|
|
@ -156,25 +157,6 @@ class DestinationMode:
|
|||
def __hash__(self) -> int:
|
||||
return hash((self.destination_address, self.travel_mode))
|
||||
|
||||
# def to_dict(self) -> dict[str, str | routing.TravelMode]:
|
||||
# return {
|
||||
# "destination_address": self.destination_address,
|
||||
# "travel_mode": self.travel_mode.value,
|
||||
# }
|
||||
|
||||
# @classmethod
|
||||
# def from_dict(cls, data: dict):
|
||||
# return cls(
|
||||
# destination_address=data["destination_address"],
|
||||
# travel_mode=routing.TravelMode(data["travel_mode"]),
|
||||
# )
|
||||
|
||||
# def __json__(self) -> dict[str, str | routing.TravelMode]:
|
||||
# return {
|
||||
# "destination_address": self.destination_address,
|
||||
# "travel_mode": self.travel_mode.value,
|
||||
# }
|
||||
|
||||
def __getstate__(self):
|
||||
# This allows serializers to pick up a dict representation
|
||||
return asdict(self)
|
||||
|
|
@ -182,3 +164,28 @@ class DestinationMode:
|
|||
def __iter__(self):
|
||||
# Makes it behave like a dict when expected
|
||||
return iter(asdict(self).items())
|
||||
|
||||
|
||||
class ListingType(enum.StrEnum):
|
||||
BUY = "BUY"
|
||||
RENT = "RENT"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QueryParameters:
|
||||
listing_type: ListingType
|
||||
min_bedrooms: int = 1
|
||||
max_bedrooms: int = 999
|
||||
min_price: int = 0
|
||||
max_price: int = 10_000_000
|
||||
district_names: set[str] = dataclasses.field(default_factory=set)
|
||||
radius: float = 0
|
||||
page_size: int = 500 # items per page
|
||||
max_days_since_added: int = 30
|
||||
furnish_types: list[FurnishType] | None = None
|
||||
# The values below are not supported by rightmove
|
||||
# hence we apply them after fetching
|
||||
# available from; council tax
|
||||
let_date_available_from: datetime | None = None
|
||||
last_seen_days: int | None = None
|
||||
min_sqm: int | None = None
|
||||
|
|
|
|||
|
|
@ -6,70 +6,7 @@ import enum
|
|||
from typing import Any
|
||||
import aiohttp
|
||||
from data_access import Listing
|
||||
from models.listing import FurnishType
|
||||
|
||||
|
||||
class ListingType(enum.StrEnum):
|
||||
BUY = "BUY"
|
||||
RENT = "RENT"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QueryParameters:
|
||||
listing_type: ListingType
|
||||
min_bedrooms: int
|
||||
max_bedrooms: int
|
||||
min_price: int
|
||||
max_price: int
|
||||
district_names: set[str]
|
||||
radius: float = 0
|
||||
page_size: int = 500 # items per page
|
||||
max_days_since_added: int = 30
|
||||
furnish_types: list[FurnishType] | None = None
|
||||
# The values below are not supported by rightmove
|
||||
# hence we apply them after fetching
|
||||
# available from; council tax
|
||||
let_date_available_from: datetime | None = None
|
||||
last_seen_days: int | None = None
|
||||
min_sqm: int | None = None
|
||||
|
||||
|
||||
async def filter_listings(
|
||||
listings: list[Listing],
|
||||
query_parameters: QueryParameters,
|
||||
) -> list[Listing]:
|
||||
"""
|
||||
Filter listings based on the provided query parameters.
|
||||
"""
|
||||
filtered_listings = []
|
||||
for listing in listings:
|
||||
if (
|
||||
listing.bedrooms > query_parameters.max_bedrooms
|
||||
or listing.bedrooms < query_parameters.min_bedrooms
|
||||
):
|
||||
continue
|
||||
if (
|
||||
listing.price < query_parameters.min_price
|
||||
or listing.price > query_parameters.max_price
|
||||
):
|
||||
continue
|
||||
if (
|
||||
query_parameters.last_seen_days is not None
|
||||
and listing.last_seen > query_parameters.last_seen_days
|
||||
):
|
||||
continue
|
||||
if (
|
||||
listing.letDateAvailable is not None
|
||||
and query_parameters.let_date_available_from is not None
|
||||
and listing.letDateAvailable < query_parameters.let_date_available_from
|
||||
):
|
||||
continue
|
||||
sqm_ocr = await listing.sqm_ocr() or 0
|
||||
if query_parameters.min_sqm is not None and sqm_ocr < query_parameters.min_sqm:
|
||||
continue
|
||||
filtered_listings.append(listing)
|
||||
|
||||
return filtered_listings
|
||||
from models.listing import FurnishType, ListingType, QueryParameters
|
||||
|
||||
|
||||
headers = {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import json
|
|||
import pathlib
|
||||
|
||||
from data_access import Listing
|
||||
from rec.query import QueryParameters, filter_listings
|
||||
from rec.query import QueryParameters
|
||||
|
||||
|
||||
async def export_immoweb(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue