From 3d9550c7f172aad054f7e6fc6cb39ab88de6e891 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 21 Feb 2026 19:47:10 +0000 Subject: [PATCH] Replace pandas with stdlib csv, apprise with direct Slack webhook, switch to opencv-headless - Rewrite csv_exporter.py to use stdlib csv.DictWriter instead of pandas DataFrame - Rewrite notifications.py to use aiohttp direct Slack webhook instead of apprise - Switch opencv-python to opencv-python-headless in pyproject.toml - Move httpx from dev to prod dependencies - Remove pandas and apprise from mypy ignore_missing_imports --- csv_exporter.py | 67 +++++++++++++++++++++++++++++++----------------- notifications.py | 44 +++++++++++++++---------------- pyproject.toml | 8 +++--- 3 files changed, 67 insertions(+), 52 deletions(-) diff --git a/csv_exporter.py b/csv_exporter.py index f3ca057..a75d73d 100644 --- a/csv_exporter.py +++ b/csv_exporter.py @@ -1,43 +1,62 @@ +import csv +import json from pathlib import Path -import pandas as pd + from models.listing import QueryParameters from repositories.listing_repository import ListingRepository +DROP_COLUMNS = {"_sa_instance_state", "additional_info"} +ENSURE_COLUMNS = ("service_charge", "lease_left", "square_meters") + + async def export_to_csv( repository: ListingRepository, output_file: Path, query_parameters: QueryParameters | None = None, ) -> None: listings = await repository.get_listings(query_parameters=query_parameters) - ds = [listing.__dict__ for listing in listings] - df = pd.DataFrame(ds) + rows = [ + {k: v for k, v in listing.__dict__.items() if k not in DROP_COLUMNS} + for listing in listings + ] - # read decisions on file + if not rows: + output_file.write_text("") + return + + # Read decisions file if present + decisions: dict[str, str] = {} decisions_path = Path("data/decisions.json") if decisions_path.exists(): - decisions = pd.read_json(decisions_path) - df.loc[:, "decision"] = df.id.apply(lambda x: decisions.get(x)) + with open(decisions_path) as f: + decisions = json.load(f) - # remove _sa_instance_state column - drop_columns = ["_sa_instance_state", "additional_info"] - df = df.drop(columns=drop_columns) + for row in rows: + # Add decision column + row["decision"] = decisions.get(str(row.get("id"))) - # Ensure columns exist with NaN defaults for clean CSV output - for col in ("service_charge", "lease_left", "square_meters"): - if col not in df.columns: - df.loc[:, col] = float("nan") + # Ensure optional columns exist + for col in ENSURE_COLUMNS: + row.setdefault(col, None) - # Replace -1 sentinel values with NaN - df.loc[:, "square_meters"] = df.square_meters.replace({-1: float("nan")}) + # Replace -1 sentinel in square_meters + if row.get("square_meters") == -1: + row["square_meters"] = None - # Add price per sqm column (guard against zero/missing square_meters) - df.loc[:, "price_per_sqm"] = df.apply( - lambda row: round(row.price / row.square_meters, 2) - if row.square_meters and row.square_meters > 0 - else None, - axis=1, - ) + # Compute price_per_sqm + sqm = row.get("square_meters") + price = row.get("price") + if sqm and sqm > 0 and price: + row["price_per_sqm"] = round(price / sqm, 2) + else: + row["price_per_sqm"] = None - df = df.sort_values(by=["price_per_sqm"], ascending=True) - df.to_csv(str(output_file), index=False) + # Sort by price_per_sqm (None values last) + rows.sort(key=lambda r: (r["price_per_sqm"] is None, r["price_per_sqm"] or 0)) + + fieldnames = list(rows[0].keys()) + with open(output_file, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) diff --git a/notifications.py b/notifications.py index 3fec5ef..9691f09 100644 --- a/notifications.py +++ b/notifications.py @@ -1,29 +1,27 @@ -from abc import abstractmethod -import apprise -from functools import lru_cache +import json +import logging import os +import aiohttp -class Surface: - @abstractmethod - def connection_string(self) -> str | None: ... - - -class Slack(Surface): - def connection_string(self) -> str | None: - return os.environ.get("SLACK_WEBHOOK_URL") - - -@lru_cache(maxsize=None) -def get_notifier() -> apprise.Apprise: - surfaces = [Slack()] - obj = apprise.Apprise() - for surface in surfaces: - if conn := surface.connection_string(): - obj.add(conn) - return obj +logger = logging.getLogger(__name__) async def send_notification(body: str, title: str = "") -> bool: - notifier = get_notifier() - return await notifier.async_notify(body=body, title=title) + webhook_url = os.environ.get("SLACK_WEBHOOK_URL") + if not webhook_url: + logger.debug("No SLACK_WEBHOOK_URL configured, skipping notification") + return False + + text = f"*{title}*\n{body}" if title else body + try: + async with aiohttp.ClientSession() as session: + async with session.post( + webhook_url, + data=json.dumps({"text": text}), + headers={"Content-Type": "application/json"}, + ) as resp: + return resp.status == 200 + except Exception: + logger.exception("Failed to send Slack notification") + return False diff --git a/pyproject.toml b/pyproject.toml index 29e3bb4..6d4f8f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,8 +11,7 @@ cachetools = "^5.3.2" pillow = "^10.2.0" numpy = "^1.26.4" pytesseract = "^0.3.10" -pandas = "^2.2.1" -opencv-python = "^4.11.0.86" +opencv-python-headless = "^4.11.0.86" click = "^8.2.0" aiohttp = "^3.11.18" aiohttp-socks = "^0.8.4" @@ -25,7 +24,6 @@ pyjwt = "^2.10.1" cryptography = "^45.0.4" celery = "^5.5.3" redis = "^6.2.0" -apprise = "^1.9.3" opentelemetry-api = "^1.36.0" opentelemetry-sdk = "^1.36.0" opentelemetry-exporter-prometheus = "^0.57b0" @@ -33,6 +31,7 @@ opentelemetry-instrumentation-fastapi = "^0.57b0" opentelemetry-instrumentation-sqlalchemy = "^0.57b0" mysqlclient = "^2.2.7" webauthn = "^2.0.0" +httpx = "^0.27.0" [tool.poetry.group.dev.dependencies] ipdb = "^0.13.13" @@ -41,7 +40,6 @@ podman-compose = "^1.5.0" pytest = "^8.0.0" pytest-asyncio = "^0.23.0" pytest-cov = "^4.1.0" -httpx = "^0.27.0" aioresponses = "^0.7.6" fakeredis = "^2.21.0" pytest-xdist = "^3.5.0" @@ -81,5 +79,5 @@ strict_optional = true plugins = ["pydantic.mypy"] [[tool.mypy.overrides]] -module = ["pytesseract.*", "cv2.*", "celery.*", "aiohttp.*", "aiohttp_socks.*", "tenacity.*", "pandas.*", "numpy.*", "PIL.*", "sqlmodel.*", "sqlalchemy.*", "alembic.*", "apprise.*", "opentelemetry.*", "webauthn.*"] +module = ["pytesseract.*", "cv2.*", "celery.*", "aiohttp.*", "aiohttp_socks.*", "tenacity.*", "numpy.*", "PIL.*", "sqlmodel.*", "sqlalchemy.*", "alembic.*", "opentelemetry.*", "webauthn.*"] ignore_missing_imports = true \ No newline at end of file