From 3d9550c7f172aad054f7e6fc6cb39ab88de6e891 Mon Sep 17 00:00:00 2001
From: Viktor Barzin <viktorbarzin@meta.com>
Date: Sat, 21 Feb 2026 19:47:10 +0000
Subject: [PATCH] Replace pandas with stdlib csv, apprise with direct Slack
 webhook, switch to opencv-headless

- Rewrite csv_exporter.py to use stdlib csv.DictWriter instead of pandas DataFrame
- Rewrite notifications.py to use aiohttp direct Slack webhook instead of apprise
- Switch opencv-python to opencv-python-headless in pyproject.toml
- Move httpx from dev to prod dependencies
- Remove pandas and apprise from mypy ignore_missing_imports
---
 csv_exporter.py  | 67 +++++++++++++++++++++++++++++++-----------------
 notifications.py | 44 +++++++++++++++----------------
 pyproject.toml   |  8 +++---
 3 files changed, 67 insertions(+), 52 deletions(-)

diff --git a/csv_exporter.py b/csv_exporter.py
index f3ca057..a75d73d 100644
--- a/csv_exporter.py
+++ b/csv_exporter.py
@@ -1,43 +1,62 @@
+import csv
+import json
 from pathlib import Path
-import pandas as pd
+
 from models.listing import QueryParameters
 from repositories.listing_repository import ListingRepository
 
 
+DROP_COLUMNS = {"_sa_instance_state", "additional_info"}
+ENSURE_COLUMNS = ("service_charge", "lease_left", "square_meters")
+
+
 async def export_to_csv(
     repository: ListingRepository,
     output_file: Path,
     query_parameters: QueryParameters | None = None,
 ) -> None:
     listings = await repository.get_listings(query_parameters=query_parameters)
-    ds = [listing.__dict__ for listing in listings]
-    df = pd.DataFrame(ds)
+    rows = [
+        {k: v for k, v in listing.__dict__.items() if k not in DROP_COLUMNS}
+        for listing in listings
+    ]
 
-    # read decisions on file
+    if not rows:
+        output_file.write_text("")
+        return
+
+    # Read decisions file if present
+    decisions: dict[str, str] = {}
     decisions_path = Path("data/decisions.json")
     if decisions_path.exists():
-        decisions = pd.read_json(decisions_path)
-        df.loc[:, "decision"] = df.id.apply(lambda x: decisions.get(x))
+        with open(decisions_path) as f:
+            decisions = json.load(f)
 
-    # remove _sa_instance_state column
-    drop_columns = ["_sa_instance_state", "additional_info"]
-    df = df.drop(columns=drop_columns)
+    for row in rows:
+        # Add decision column
+        row["decision"] = decisions.get(str(row.get("id")))
 
-    # Ensure columns exist with NaN defaults for clean CSV output
-    for col in ("service_charge", "lease_left", "square_meters"):
-        if col not in df.columns:
-            df.loc[:, col] = float("nan")
+        # Ensure optional columns exist
+        for col in ENSURE_COLUMNS:
+            row.setdefault(col, None)
 
-    # Replace -1 sentinel values with NaN
-    df.loc[:, "square_meters"] = df.square_meters.replace({-1: float("nan")})
+        # Replace -1 sentinel in square_meters
+        if row.get("square_meters") == -1:
+            row["square_meters"] = None
 
-    # Add price per sqm column (guard against zero/missing square_meters)
-    df.loc[:, "price_per_sqm"] = df.apply(
-        lambda row: round(row.price / row.square_meters, 2)
-        if row.square_meters and row.square_meters > 0
-        else None,
-        axis=1,
-    )
+        # Compute price_per_sqm
+        sqm = row.get("square_meters")
+        price = row.get("price")
+        if sqm and sqm > 0 and price:
+            row["price_per_sqm"] = round(price / sqm, 2)
+        else:
+            row["price_per_sqm"] = None
 
-    df = df.sort_values(by=["price_per_sqm"], ascending=True)
-    df.to_csv(str(output_file), index=False)
+    # Sort by price_per_sqm (None values last)
+    rows.sort(key=lambda r: (r["price_per_sqm"] is None, r["price_per_sqm"] or 0))
+
+    fieldnames = list(rows[0].keys())
+    with open(output_file, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(rows)
diff --git a/notifications.py b/notifications.py
index 3fec5ef..9691f09 100644
--- a/notifications.py
+++ b/notifications.py
@@ -1,29 +1,27 @@
-from abc import abstractmethod
-import apprise
-from functools import lru_cache
+import json
+import logging
 import os
 
+import aiohttp
 
-class Surface:
-    @abstractmethod
-    def connection_string(self) -> str | None: ...
-
-
-class Slack(Surface):
-    def connection_string(self) -> str | None:
-        return os.environ.get("SLACK_WEBHOOK_URL")
-
-
-@lru_cache(maxsize=None)
-def get_notifier() -> apprise.Apprise:
-    surfaces = [Slack()]
-    obj = apprise.Apprise()
-    for surface in surfaces:
-        if conn := surface.connection_string():
-            obj.add(conn)
-    return obj
+logger = logging.getLogger(__name__)
 
 
 async def send_notification(body: str, title: str = "") -> bool:
-    notifier = get_notifier()
-    return await notifier.async_notify(body=body, title=title)
+    webhook_url = os.environ.get("SLACK_WEBHOOK_URL")
+    if not webhook_url:
+        logger.debug("No SLACK_WEBHOOK_URL configured, skipping notification")
+        return False
+
+    text = f"*{title}*\n{body}" if title else body
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.post(
+                webhook_url,
+                data=json.dumps({"text": text}),
+                headers={"Content-Type": "application/json"},
+            ) as resp:
+                return resp.status == 200
+    except Exception:
+        logger.exception("Failed to send Slack notification")
+        return False
diff --git a/pyproject.toml b/pyproject.toml
index 29e3bb4..6d4f8f7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,8 +11,7 @@ cachetools = "^5.3.2"
 pillow = "^10.2.0"
 numpy = "^1.26.4"
 pytesseract = "^0.3.10"
-pandas = "^2.2.1"
-opencv-python = "^4.11.0.86"
+opencv-python-headless = "^4.11.0.86"
 click = "^8.2.0"
 aiohttp = "^3.11.18"
 aiohttp-socks = "^0.8.4"
@@ -25,7 +24,6 @@ pyjwt = "^2.10.1"
 cryptography = "^45.0.4"
 celery = "^5.5.3"
 redis = "^6.2.0"
-apprise = "^1.9.3"
 opentelemetry-api = "^1.36.0"
 opentelemetry-sdk = "^1.36.0"
 opentelemetry-exporter-prometheus = "^0.57b0"
@@ -33,6 +31,7 @@ opentelemetry-instrumentation-fastapi = "^0.57b0"
 opentelemetry-instrumentation-sqlalchemy = "^0.57b0"
 mysqlclient = "^2.2.7"
 webauthn = "^2.0.0"
+httpx = "^0.27.0"
 
 [tool.poetry.group.dev.dependencies]
 ipdb = "^0.13.13"
@@ -41,7 +40,6 @@ podman-compose = "^1.5.0"
 pytest = "^8.0.0"
 pytest-asyncio = "^0.23.0"
 pytest-cov = "^4.1.0"
-httpx = "^0.27.0"
 aioresponses = "^0.7.6"
 fakeredis = "^2.21.0"
 pytest-xdist = "^3.5.0"
@@ -81,5 +79,5 @@ strict_optional = true
 plugins = ["pydantic.mypy"]
 
 [[tool.mypy.overrides]]
-module = ["pytesseract.*", "cv2.*", "celery.*", "aiohttp.*", "aiohttp_socks.*", "tenacity.*", "pandas.*", "numpy.*", "PIL.*", "sqlmodel.*", "sqlalchemy.*", "alembic.*", "apprise.*", "opentelemetry.*", "webauthn.*"]
+module = ["pytesseract.*", "cv2.*", "celery.*", "aiohttp.*", "aiohttp_socks.*", "tenacity.*", "numpy.*", "PIL.*", "sqlmodel.*", "sqlalchemy.*", "alembic.*", "opentelemetry.*", "webauthn.*"]
 ignore_missing_imports = true
\ No newline at end of file