Replace pandas with stdlib csv, apprise with direct Slack webhook, switch to opencv-headless
- Rewrite csv_exporter.py to use stdlib csv.DictWriter instead of pandas DataFrame - Rewrite notifications.py to use aiohttp direct Slack webhook instead of apprise - Switch opencv-python to opencv-python-headless in pyproject.toml - Move httpx from dev to prod dependencies - Remove pandas and apprise from mypy ignore_missing_imports
This commit is contained in:
parent
cde3540a1e
commit
3d9550c7f1
3 changed files with 67 additions and 52 deletions
|
|
@ -1,43 +1,62 @@
|
|||
import csv
|
||||
import json
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
|
||||
from models.listing import QueryParameters
|
||||
from repositories.listing_repository import ListingRepository
|
||||
|
||||
|
||||
DROP_COLUMNS = {"_sa_instance_state", "additional_info"}
|
||||
ENSURE_COLUMNS = ("service_charge", "lease_left", "square_meters")
|
||||
|
||||
|
||||
async def export_to_csv(
|
||||
repository: ListingRepository,
|
||||
output_file: Path,
|
||||
query_parameters: QueryParameters | None = None,
|
||||
) -> None:
|
||||
listings = await repository.get_listings(query_parameters=query_parameters)
|
||||
ds = [listing.__dict__ for listing in listings]
|
||||
df = pd.DataFrame(ds)
|
||||
rows = [
|
||||
{k: v for k, v in listing.__dict__.items() if k not in DROP_COLUMNS}
|
||||
for listing in listings
|
||||
]
|
||||
|
||||
# read decisions on file
|
||||
if not rows:
|
||||
output_file.write_text("")
|
||||
return
|
||||
|
||||
# Read decisions file if present
|
||||
decisions: dict[str, str] = {}
|
||||
decisions_path = Path("data/decisions.json")
|
||||
if decisions_path.exists():
|
||||
decisions = pd.read_json(decisions_path)
|
||||
df.loc[:, "decision"] = df.id.apply(lambda x: decisions.get(x))
|
||||
with open(decisions_path) as f:
|
||||
decisions = json.load(f)
|
||||
|
||||
# remove _sa_instance_state column
|
||||
drop_columns = ["_sa_instance_state", "additional_info"]
|
||||
df = df.drop(columns=drop_columns)
|
||||
for row in rows:
|
||||
# Add decision column
|
||||
row["decision"] = decisions.get(str(row.get("id")))
|
||||
|
||||
# Ensure columns exist with NaN defaults for clean CSV output
|
||||
for col in ("service_charge", "lease_left", "square_meters"):
|
||||
if col not in df.columns:
|
||||
df.loc[:, col] = float("nan")
|
||||
# Ensure optional columns exist
|
||||
for col in ENSURE_COLUMNS:
|
||||
row.setdefault(col, None)
|
||||
|
||||
# Replace -1 sentinel values with NaN
|
||||
df.loc[:, "square_meters"] = df.square_meters.replace({-1: float("nan")})
|
||||
# Replace -1 sentinel in square_meters
|
||||
if row.get("square_meters") == -1:
|
||||
row["square_meters"] = None
|
||||
|
||||
# Add price per sqm column (guard against zero/missing square_meters)
|
||||
df.loc[:, "price_per_sqm"] = df.apply(
|
||||
lambda row: round(row.price / row.square_meters, 2)
|
||||
if row.square_meters and row.square_meters > 0
|
||||
else None,
|
||||
axis=1,
|
||||
)
|
||||
# Compute price_per_sqm
|
||||
sqm = row.get("square_meters")
|
||||
price = row.get("price")
|
||||
if sqm and sqm > 0 and price:
|
||||
row["price_per_sqm"] = round(price / sqm, 2)
|
||||
else:
|
||||
row["price_per_sqm"] = None
|
||||
|
||||
df = df.sort_values(by=["price_per_sqm"], ascending=True)
|
||||
df.to_csv(str(output_file), index=False)
|
||||
# Sort by price_per_sqm (None values last)
|
||||
rows.sort(key=lambda r: (r["price_per_sqm"] is None, r["price_per_sqm"] or 0))
|
||||
|
||||
fieldnames = list(rows[0].keys())
|
||||
with open(output_file, "w", newline="") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue