use query params to filter out models; also make csv exporter work with models
This commit is contained in:
parent
80c335ba04
commit
e317d2ec54
5 changed files with 72 additions and 113 deletions
|
|
@ -1,24 +1,26 @@
|
|||
import asyncio
|
||||
from pathlib import Path
|
||||
from data_access import Listing
|
||||
import pandas as pd
|
||||
from rec.query import QueryParameters, filter_listings
|
||||
from rec.query import QueryParameters
|
||||
from repositories.listing_repository import ListingRepository
|
||||
|
||||
|
||||
async def export_to_csv(
|
||||
listings: list[Listing],
|
||||
repository: ListingRepository,
|
||||
output_file: Path,
|
||||
columns: list[str],
|
||||
query_parameters: QueryParameters | None = None,
|
||||
) -> None:
|
||||
if query_parameters is not None:
|
||||
listings = await filter_listings(listings, query_parameters)
|
||||
ds = await asyncio.gather(*[listing.dict_nicely() for listing in listings])
|
||||
listings = await repository.get_listings(query_parameters=query_parameters)
|
||||
ds = [*[listing.__dict__ for listing in listings]]
|
||||
df = pd.DataFrame(ds)
|
||||
|
||||
# read decisions on file
|
||||
decisions_path = "data/decisions.json"
|
||||
decisions = pd.read_json(decisions_path)
|
||||
df.loc[:, "decision"] = df.identifier.apply(lambda x: decisions.get(x))
|
||||
df.loc[:, "decision"] = df.id.apply(lambda x: decisions.get(x))
|
||||
|
||||
# remove _sa_instance_state column
|
||||
drop_columns = ["_sa_instance_state", "additional_info"]
|
||||
df = df.drop(columns=drop_columns)
|
||||
|
||||
# remove all entries where we didnt calculate transit time (probably due to a too far distance)
|
||||
# df2 = df[df.travel_time_fastest.notna()]
|
||||
|
|
@ -30,9 +32,15 @@ async def export_to_csv(
|
|||
# s1 = df2
|
||||
|
||||
# fill in gap values for service charge and lease left. This is for excel so we can use filters better there
|
||||
if "service_charge" not in df2.columns:
|
||||
df2.loc[:, "service_charge"] = -1
|
||||
df2.loc[:, "service_charge"] = df2.service_charge.fillna(-1)
|
||||
if "lease_left" not in df2.columns:
|
||||
df2.loc[:, "lease_left"] = -1
|
||||
df2.loc[:, "lease_left"] = df2.lease_left.fillna(-1)
|
||||
df2.loc[:, "sqm_ocr"] = df2.sqm_ocr.fillna(-1)
|
||||
if "square_meters" not in df2.columns:
|
||||
df2.loc[:, "square_meters"] = -1
|
||||
df2.loc[:, "square_meters"] = df2.square_meters.fillna(-1)
|
||||
|
||||
df3 = df2
|
||||
# df3 = pd.concat([df2.drop(['travel_time_fastest', 'travel_time_second'], axis=1), s1], axis=1)
|
||||
|
|
@ -40,6 +48,11 @@ async def export_to_csv(
|
|||
df3.shape
|
||||
df4 = df3
|
||||
|
||||
df5 = df4[columns]
|
||||
# df5 = df4[columns]
|
||||
|
||||
# Add some interesting columns
|
||||
df4.loc[:, "price_per_sqm"] = df4.price / df4.square_meters
|
||||
df5 = df4
|
||||
|
||||
df6 = df5.sort_values(by=["price_per_sqm"], ascending=True)
|
||||
df6.to_csv(str(output_file), index=False)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue