2026-02-21 19:47:10 +00:00
|
|
|
import csv
|
|
|
|
|
import json
|
2025-05-17 20:13:28 +00:00
|
|
|
from pathlib import Path
|
2026-02-21 19:47:10 +00:00
|
|
|
|
2026-02-06 20:55:10 +00:00
|
|
|
from models.listing import QueryParameters
|
2025-06-08 17:01:33 +00:00
|
|
|
from repositories.listing_repository import ListingRepository
|
2025-05-17 20:13:28 +00:00
|
|
|
|
|
|
|
|
|
2026-02-21 19:47:10 +00:00
|
|
|
DROP_COLUMNS = {"_sa_instance_state", "additional_info"}
|
|
|
|
|
ENSURE_COLUMNS = ("service_charge", "lease_left", "square_meters")
|
|
|
|
|
|
|
|
|
|
|
2025-05-20 21:58:08 +00:00
|
|
|
async def export_to_csv(
|
2025-06-08 17:01:33 +00:00
|
|
|
repository: ListingRepository,
|
2025-05-18 12:27:26 +00:00
|
|
|
output_file: Path,
|
2025-06-01 20:11:00 +00:00
|
|
|
query_parameters: QueryParameters | None = None,
|
2025-05-17 20:13:28 +00:00
|
|
|
) -> None:
|
2025-06-08 17:01:33 +00:00
|
|
|
listings = await repository.get_listings(query_parameters=query_parameters)
|
2026-02-21 19:47:10 +00:00
|
|
|
rows = [
|
|
|
|
|
{k: v for k, v in listing.__dict__.items() if k not in DROP_COLUMNS}
|
|
|
|
|
for listing in listings
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
if not rows:
|
|
|
|
|
output_file.write_text("")
|
|
|
|
|
return
|
2025-06-08 17:01:33 +00:00
|
|
|
|
2026-02-21 19:47:10 +00:00
|
|
|
# Read decisions file if present
|
|
|
|
|
decisions: dict[str, str] = {}
|
Refactor backend for cleaner error handling, DRY, and type safety
- Extract rate limiter DRY: consolidate 3 duplicated check/respond paths
into _check_counter and _enforce_limit helpers, add proper type annotations
- Replace bare Exception raises with FloorplanDownloadError and
RightmoveApiError; narrow catch clauses to specific exception types;
fix Step base class to inherit from ABC
- Consolidate MAX_OCR_WORKERS into config/scraper_config.py; extract
_find_tenure_value helper to deduplicate tenure parsing
- Extract _build_poi_distances_lookup from stream endpoint to reduce nesting
- Fix csv_exporter: optional decisions.json, NaN instead of -1 sentinels,
guard against division by zero on missing square meters
- Fix notifications.py broken list[Surface]() constructor, database.py
stale comments and missing type annotation, auth.py type:ignore,
ui_exporter.py stale TODO
- Fix 3 pre-existing test failures: mock cache layer in streaming tests,
bypass rate limiter for test isolation, fix cache invalidation test to
account for two-pattern scan loop
2026-02-10 22:19:24 +00:00
|
|
|
decisions_path = Path("data/decisions.json")
|
|
|
|
|
if decisions_path.exists():
|
2026-02-21 19:47:10 +00:00
|
|
|
with open(decisions_path) as f:
|
|
|
|
|
decisions = json.load(f)
|
|
|
|
|
|
|
|
|
|
for row in rows:
|
|
|
|
|
# Add decision column
|
|
|
|
|
row["decision"] = decisions.get(str(row.get("id")))
|
|
|
|
|
|
|
|
|
|
# Ensure optional columns exist
|
|
|
|
|
for col in ENSURE_COLUMNS:
|
|
|
|
|
row.setdefault(col, None)
|
|
|
|
|
|
|
|
|
|
# Replace -1 sentinel in square_meters
|
|
|
|
|
if row.get("square_meters") == -1:
|
|
|
|
|
row["square_meters"] = None
|
|
|
|
|
|
|
|
|
|
# Compute price_per_sqm
|
|
|
|
|
sqm = row.get("square_meters")
|
|
|
|
|
price = row.get("price")
|
|
|
|
|
if sqm and sqm > 0 and price:
|
|
|
|
|
row["price_per_sqm"] = round(price / sqm, 2)
|
|
|
|
|
else:
|
|
|
|
|
row["price_per_sqm"] = None
|
|
|
|
|
|
|
|
|
|
# Sort by price_per_sqm (None values last)
|
|
|
|
|
rows.sort(key=lambda r: (r["price_per_sqm"] is None, r["price_per_sqm"] or 0))
|
|
|
|
|
|
|
|
|
|
fieldnames = list(rows[0].keys())
|
|
|
|
|
with open(output_file, "w", newline="") as f:
|
|
|
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
|
|
|
writer.writeheader()
|
|
|
|
|
writer.writerows(rows)
|