Replace pandas with stdlib csv, apprise with direct Slack webhook, switch to opencv-headless

- Rewrite csv_exporter.py to use stdlib csv.DictWriter instead of pandas DataFrame
- Rewrite notifications.py to use aiohttp direct Slack webhook instead of apprise
- Switch opencv-python to opencv-python-headless in pyproject.toml
- Move httpx from dev to prod dependencies
- Remove pandas and apprise from mypy ignore_missing_imports
This commit is contained in:
Viktor Barzin 2026-02-21 19:47:10 +00:00
parent cde3540a1e
commit 3d9550c7f1
No known key found for this signature in database
GPG key ID: 0EB088298288D958
3 changed files with 67 additions and 52 deletions

View file

@ -1,43 +1,62 @@
import csv
import json
from pathlib import Path from pathlib import Path
import pandas as pd
from models.listing import QueryParameters from models.listing import QueryParameters
from repositories.listing_repository import ListingRepository from repositories.listing_repository import ListingRepository
DROP_COLUMNS = {"_sa_instance_state", "additional_info"}
ENSURE_COLUMNS = ("service_charge", "lease_left", "square_meters")
async def export_to_csv( async def export_to_csv(
repository: ListingRepository, repository: ListingRepository,
output_file: Path, output_file: Path,
query_parameters: QueryParameters | None = None, query_parameters: QueryParameters | None = None,
) -> None: ) -> None:
listings = await repository.get_listings(query_parameters=query_parameters) listings = await repository.get_listings(query_parameters=query_parameters)
ds = [listing.__dict__ for listing in listings] rows = [
df = pd.DataFrame(ds) {k: v for k, v in listing.__dict__.items() if k not in DROP_COLUMNS}
for listing in listings
]
# read decisions on file if not rows:
output_file.write_text("")
return
# Read decisions file if present
decisions: dict[str, str] = {}
decisions_path = Path("data/decisions.json") decisions_path = Path("data/decisions.json")
if decisions_path.exists(): if decisions_path.exists():
decisions = pd.read_json(decisions_path) with open(decisions_path) as f:
df.loc[:, "decision"] = df.id.apply(lambda x: decisions.get(x)) decisions = json.load(f)
# remove _sa_instance_state column for row in rows:
drop_columns = ["_sa_instance_state", "additional_info"] # Add decision column
df = df.drop(columns=drop_columns) row["decision"] = decisions.get(str(row.get("id")))
# Ensure columns exist with NaN defaults for clean CSV output # Ensure optional columns exist
for col in ("service_charge", "lease_left", "square_meters"): for col in ENSURE_COLUMNS:
if col not in df.columns: row.setdefault(col, None)
df.loc[:, col] = float("nan")
# Replace -1 sentinel values with NaN # Replace -1 sentinel in square_meters
df.loc[:, "square_meters"] = df.square_meters.replace({-1: float("nan")}) if row.get("square_meters") == -1:
row["square_meters"] = None
# Add price per sqm column (guard against zero/missing square_meters) # Compute price_per_sqm
df.loc[:, "price_per_sqm"] = df.apply( sqm = row.get("square_meters")
lambda row: round(row.price / row.square_meters, 2) price = row.get("price")
if row.square_meters and row.square_meters > 0 if sqm and sqm > 0 and price:
else None, row["price_per_sqm"] = round(price / sqm, 2)
axis=1, else:
) row["price_per_sqm"] = None
df = df.sort_values(by=["price_per_sqm"], ascending=True) # Sort by price_per_sqm (None values last)
df.to_csv(str(output_file), index=False) rows.sort(key=lambda r: (r["price_per_sqm"] is None, r["price_per_sqm"] or 0))
fieldnames = list(rows[0].keys())
with open(output_file, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)

View file

@ -1,29 +1,27 @@
from abc import abstractmethod import json
import apprise import logging
from functools import lru_cache
import os import os
import aiohttp
class Surface: logger = logging.getLogger(__name__)
@abstractmethod
def connection_string(self) -> str | None: ...
class Slack(Surface):
def connection_string(self) -> str | None:
return os.environ.get("SLACK_WEBHOOK_URL")
@lru_cache(maxsize=None)
def get_notifier() -> apprise.Apprise:
surfaces = [Slack()]
obj = apprise.Apprise()
for surface in surfaces:
if conn := surface.connection_string():
obj.add(conn)
return obj
async def send_notification(body: str, title: str = "") -> bool: async def send_notification(body: str, title: str = "") -> bool:
notifier = get_notifier() webhook_url = os.environ.get("SLACK_WEBHOOK_URL")
return await notifier.async_notify(body=body, title=title) if not webhook_url:
logger.debug("No SLACK_WEBHOOK_URL configured, skipping notification")
return False
text = f"*{title}*\n{body}" if title else body
try:
async with aiohttp.ClientSession() as session:
async with session.post(
webhook_url,
data=json.dumps({"text": text}),
headers={"Content-Type": "application/json"},
) as resp:
return resp.status == 200
except Exception:
logger.exception("Failed to send Slack notification")
return False

View file

@ -11,8 +11,7 @@ cachetools = "^5.3.2"
pillow = "^10.2.0" pillow = "^10.2.0"
numpy = "^1.26.4" numpy = "^1.26.4"
pytesseract = "^0.3.10" pytesseract = "^0.3.10"
pandas = "^2.2.1" opencv-python-headless = "^4.11.0.86"
opencv-python = "^4.11.0.86"
click = "^8.2.0" click = "^8.2.0"
aiohttp = "^3.11.18" aiohttp = "^3.11.18"
aiohttp-socks = "^0.8.4" aiohttp-socks = "^0.8.4"
@ -25,7 +24,6 @@ pyjwt = "^2.10.1"
cryptography = "^45.0.4" cryptography = "^45.0.4"
celery = "^5.5.3" celery = "^5.5.3"
redis = "^6.2.0" redis = "^6.2.0"
apprise = "^1.9.3"
opentelemetry-api = "^1.36.0" opentelemetry-api = "^1.36.0"
opentelemetry-sdk = "^1.36.0" opentelemetry-sdk = "^1.36.0"
opentelemetry-exporter-prometheus = "^0.57b0" opentelemetry-exporter-prometheus = "^0.57b0"
@ -33,6 +31,7 @@ opentelemetry-instrumentation-fastapi = "^0.57b0"
opentelemetry-instrumentation-sqlalchemy = "^0.57b0" opentelemetry-instrumentation-sqlalchemy = "^0.57b0"
mysqlclient = "^2.2.7" mysqlclient = "^2.2.7"
webauthn = "^2.0.0" webauthn = "^2.0.0"
httpx = "^0.27.0"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
ipdb = "^0.13.13" ipdb = "^0.13.13"
@ -41,7 +40,6 @@ podman-compose = "^1.5.0"
pytest = "^8.0.0" pytest = "^8.0.0"
pytest-asyncio = "^0.23.0" pytest-asyncio = "^0.23.0"
pytest-cov = "^4.1.0" pytest-cov = "^4.1.0"
httpx = "^0.27.0"
aioresponses = "^0.7.6" aioresponses = "^0.7.6"
fakeredis = "^2.21.0" fakeredis = "^2.21.0"
pytest-xdist = "^3.5.0" pytest-xdist = "^3.5.0"
@ -81,5 +79,5 @@ strict_optional = true
plugins = ["pydantic.mypy"] plugins = ["pydantic.mypy"]
[[tool.mypy.overrides]] [[tool.mypy.overrides]]
module = ["pytesseract.*", "cv2.*", "celery.*", "aiohttp.*", "aiohttp_socks.*", "tenacity.*", "pandas.*", "numpy.*", "PIL.*", "sqlmodel.*", "sqlalchemy.*", "alembic.*", "apprise.*", "opentelemetry.*", "webauthn.*"] module = ["pytesseract.*", "cv2.*", "celery.*", "aiohttp.*", "aiohttp_socks.*", "tenacity.*", "numpy.*", "PIL.*", "sqlmodel.*", "sqlalchemy.*", "alembic.*", "opentelemetry.*", "webauthn.*"]
ignore_missing_imports = true ignore_missing_imports = true