2025-05-17 20:13:28 +00:00
|
|
|
from pathlib import Path
|
|
|
|
|
import pandas as pd
|
2026-02-06 20:55:10 +00:00
|
|
|
from models.listing import QueryParameters
|
2025-06-08 17:01:33 +00:00
|
|
|
from repositories.listing_repository import ListingRepository
|
2025-05-17 20:13:28 +00:00
|
|
|
|
|
|
|
|
|
2025-05-20 21:58:08 +00:00
|
|
|
async def export_to_csv(
|
2025-06-08 17:01:33 +00:00
|
|
|
repository: ListingRepository,
|
2025-05-18 12:27:26 +00:00
|
|
|
output_file: Path,
|
2025-06-01 20:11:00 +00:00
|
|
|
query_parameters: QueryParameters | None = None,
|
2025-05-17 20:13:28 +00:00
|
|
|
) -> None:
|
2025-06-08 17:01:33 +00:00
|
|
|
listings = await repository.get_listings(query_parameters=query_parameters)
|
2026-02-06 20:55:10 +00:00
|
|
|
ds = [listing.__dict__ for listing in listings]
|
2025-05-17 20:13:28 +00:00
|
|
|
df = pd.DataFrame(ds)
|
2025-06-08 17:01:33 +00:00
|
|
|
|
2025-05-17 20:13:28 +00:00
|
|
|
# read decisions on file
|
2025-05-31 23:50:43 +00:00
|
|
|
decisions_path = "data/decisions.json"
|
2025-05-17 20:13:28 +00:00
|
|
|
decisions = pd.read_json(decisions_path)
|
2025-06-08 17:01:33 +00:00
|
|
|
df.loc[:, "decision"] = df.id.apply(lambda x: decisions.get(x))
|
|
|
|
|
|
|
|
|
|
# remove _sa_instance_state column
|
|
|
|
|
drop_columns = ["_sa_instance_state", "additional_info"]
|
|
|
|
|
df = df.drop(columns=drop_columns)
|
2025-05-17 20:13:28 +00:00
|
|
|
|
2026-02-06 20:55:10 +00:00
|
|
|
# fill in gap values for service charge and lease left for Excel filters
|
|
|
|
|
if "service_charge" not in df.columns:
|
|
|
|
|
df.loc[:, "service_charge"] = -1
|
|
|
|
|
df.loc[:, "service_charge"] = df.service_charge.fillna(-1)
|
|
|
|
|
if "lease_left" not in df.columns:
|
|
|
|
|
df.loc[:, "lease_left"] = -1
|
|
|
|
|
df.loc[:, "lease_left"] = df.lease_left.fillna(-1)
|
|
|
|
|
if "square_meters" not in df.columns:
|
|
|
|
|
df.loc[:, "square_meters"] = -1
|
|
|
|
|
df.loc[:, "square_meters"] = df.square_meters.fillna(-1)
|
|
|
|
|
|
|
|
|
|
# Add price per sqm column
|
|
|
|
|
df.loc[:, "price_per_sqm"] = df.price / df.square_meters
|
|
|
|
|
|
|
|
|
|
df = df.sort_values(by=["price_per_sqm"], ascending=True)
|
|
|
|
|
df.to_csv(str(output_file), index=False)
|