Replace pandas with stdlib csv, apprise with direct Slack webhook, switch to opencv-headless
- Rewrite csv_exporter.py to use stdlib csv.DictWriter instead of pandas DataFrame - Rewrite notifications.py to use aiohttp direct Slack webhook instead of apprise - Switch opencv-python to opencv-python-headless in pyproject.toml - Move httpx from dev to prod dependencies - Remove pandas and apprise from mypy ignore_missing_imports
This commit is contained in:
parent
cde3540a1e
commit
3d9550c7f1
3 changed files with 67 additions and 52 deletions
|
|
@ -1,43 +1,62 @@
|
||||||
|
import csv
|
||||||
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import pandas as pd
|
|
||||||
from models.listing import QueryParameters
|
from models.listing import QueryParameters
|
||||||
from repositories.listing_repository import ListingRepository
|
from repositories.listing_repository import ListingRepository
|
||||||
|
|
||||||
|
|
||||||
|
DROP_COLUMNS = {"_sa_instance_state", "additional_info"}
|
||||||
|
ENSURE_COLUMNS = ("service_charge", "lease_left", "square_meters")
|
||||||
|
|
||||||
|
|
||||||
async def export_to_csv(
|
async def export_to_csv(
|
||||||
repository: ListingRepository,
|
repository: ListingRepository,
|
||||||
output_file: Path,
|
output_file: Path,
|
||||||
query_parameters: QueryParameters | None = None,
|
query_parameters: QueryParameters | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
listings = await repository.get_listings(query_parameters=query_parameters)
|
listings = await repository.get_listings(query_parameters=query_parameters)
|
||||||
ds = [listing.__dict__ for listing in listings]
|
rows = [
|
||||||
df = pd.DataFrame(ds)
|
{k: v for k, v in listing.__dict__.items() if k not in DROP_COLUMNS}
|
||||||
|
for listing in listings
|
||||||
|
]
|
||||||
|
|
||||||
# read decisions on file
|
if not rows:
|
||||||
|
output_file.write_text("")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Read decisions file if present
|
||||||
|
decisions: dict[str, str] = {}
|
||||||
decisions_path = Path("data/decisions.json")
|
decisions_path = Path("data/decisions.json")
|
||||||
if decisions_path.exists():
|
if decisions_path.exists():
|
||||||
decisions = pd.read_json(decisions_path)
|
with open(decisions_path) as f:
|
||||||
df.loc[:, "decision"] = df.id.apply(lambda x: decisions.get(x))
|
decisions = json.load(f)
|
||||||
|
|
||||||
# remove _sa_instance_state column
|
for row in rows:
|
||||||
drop_columns = ["_sa_instance_state", "additional_info"]
|
# Add decision column
|
||||||
df = df.drop(columns=drop_columns)
|
row["decision"] = decisions.get(str(row.get("id")))
|
||||||
|
|
||||||
# Ensure columns exist with NaN defaults for clean CSV output
|
# Ensure optional columns exist
|
||||||
for col in ("service_charge", "lease_left", "square_meters"):
|
for col in ENSURE_COLUMNS:
|
||||||
if col not in df.columns:
|
row.setdefault(col, None)
|
||||||
df.loc[:, col] = float("nan")
|
|
||||||
|
|
||||||
# Replace -1 sentinel values with NaN
|
# Replace -1 sentinel in square_meters
|
||||||
df.loc[:, "square_meters"] = df.square_meters.replace({-1: float("nan")})
|
if row.get("square_meters") == -1:
|
||||||
|
row["square_meters"] = None
|
||||||
|
|
||||||
# Add price per sqm column (guard against zero/missing square_meters)
|
# Compute price_per_sqm
|
||||||
df.loc[:, "price_per_sqm"] = df.apply(
|
sqm = row.get("square_meters")
|
||||||
lambda row: round(row.price / row.square_meters, 2)
|
price = row.get("price")
|
||||||
if row.square_meters and row.square_meters > 0
|
if sqm and sqm > 0 and price:
|
||||||
else None,
|
row["price_per_sqm"] = round(price / sqm, 2)
|
||||||
axis=1,
|
else:
|
||||||
)
|
row["price_per_sqm"] = None
|
||||||
|
|
||||||
df = df.sort_values(by=["price_per_sqm"], ascending=True)
|
# Sort by price_per_sqm (None values last)
|
||||||
df.to_csv(str(output_file), index=False)
|
rows.sort(key=lambda r: (r["price_per_sqm"] is None, r["price_per_sqm"] or 0))
|
||||||
|
|
||||||
|
fieldnames = list(rows[0].keys())
|
||||||
|
with open(output_file, "w", newline="") as f:
|
||||||
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows(rows)
|
||||||
|
|
|
||||||
|
|
@ -1,29 +1,27 @@
|
||||||
from abc import abstractmethod
|
import json
|
||||||
import apprise
|
import logging
|
||||||
from functools import lru_cache
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
class Surface:
|
logger = logging.getLogger(__name__)
|
||||||
@abstractmethod
|
|
||||||
def connection_string(self) -> str | None: ...
|
|
||||||
|
|
||||||
|
|
||||||
class Slack(Surface):
|
|
||||||
def connection_string(self) -> str | None:
|
|
||||||
return os.environ.get("SLACK_WEBHOOK_URL")
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=None)
|
|
||||||
def get_notifier() -> apprise.Apprise:
|
|
||||||
surfaces = [Slack()]
|
|
||||||
obj = apprise.Apprise()
|
|
||||||
for surface in surfaces:
|
|
||||||
if conn := surface.connection_string():
|
|
||||||
obj.add(conn)
|
|
||||||
return obj
|
|
||||||
|
|
||||||
|
|
||||||
async def send_notification(body: str, title: str = "") -> bool:
|
async def send_notification(body: str, title: str = "") -> bool:
|
||||||
notifier = get_notifier()
|
webhook_url = os.environ.get("SLACK_WEBHOOK_URL")
|
||||||
return await notifier.async_notify(body=body, title=title)
|
if not webhook_url:
|
||||||
|
logger.debug("No SLACK_WEBHOOK_URL configured, skipping notification")
|
||||||
|
return False
|
||||||
|
|
||||||
|
text = f"*{title}*\n{body}" if title else body
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.post(
|
||||||
|
webhook_url,
|
||||||
|
data=json.dumps({"text": text}),
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
) as resp:
|
||||||
|
return resp.status == 200
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Failed to send Slack notification")
|
||||||
|
return False
|
||||||
|
|
|
||||||
|
|
@ -11,8 +11,7 @@ cachetools = "^5.3.2"
|
||||||
pillow = "^10.2.0"
|
pillow = "^10.2.0"
|
||||||
numpy = "^1.26.4"
|
numpy = "^1.26.4"
|
||||||
pytesseract = "^0.3.10"
|
pytesseract = "^0.3.10"
|
||||||
pandas = "^2.2.1"
|
opencv-python-headless = "^4.11.0.86"
|
||||||
opencv-python = "^4.11.0.86"
|
|
||||||
click = "^8.2.0"
|
click = "^8.2.0"
|
||||||
aiohttp = "^3.11.18"
|
aiohttp = "^3.11.18"
|
||||||
aiohttp-socks = "^0.8.4"
|
aiohttp-socks = "^0.8.4"
|
||||||
|
|
@ -25,7 +24,6 @@ pyjwt = "^2.10.1"
|
||||||
cryptography = "^45.0.4"
|
cryptography = "^45.0.4"
|
||||||
celery = "^5.5.3"
|
celery = "^5.5.3"
|
||||||
redis = "^6.2.0"
|
redis = "^6.2.0"
|
||||||
apprise = "^1.9.3"
|
|
||||||
opentelemetry-api = "^1.36.0"
|
opentelemetry-api = "^1.36.0"
|
||||||
opentelemetry-sdk = "^1.36.0"
|
opentelemetry-sdk = "^1.36.0"
|
||||||
opentelemetry-exporter-prometheus = "^0.57b0"
|
opentelemetry-exporter-prometheus = "^0.57b0"
|
||||||
|
|
@ -33,6 +31,7 @@ opentelemetry-instrumentation-fastapi = "^0.57b0"
|
||||||
opentelemetry-instrumentation-sqlalchemy = "^0.57b0"
|
opentelemetry-instrumentation-sqlalchemy = "^0.57b0"
|
||||||
mysqlclient = "^2.2.7"
|
mysqlclient = "^2.2.7"
|
||||||
webauthn = "^2.0.0"
|
webauthn = "^2.0.0"
|
||||||
|
httpx = "^0.27.0"
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
ipdb = "^0.13.13"
|
ipdb = "^0.13.13"
|
||||||
|
|
@ -41,7 +40,6 @@ podman-compose = "^1.5.0"
|
||||||
pytest = "^8.0.0"
|
pytest = "^8.0.0"
|
||||||
pytest-asyncio = "^0.23.0"
|
pytest-asyncio = "^0.23.0"
|
||||||
pytest-cov = "^4.1.0"
|
pytest-cov = "^4.1.0"
|
||||||
httpx = "^0.27.0"
|
|
||||||
aioresponses = "^0.7.6"
|
aioresponses = "^0.7.6"
|
||||||
fakeredis = "^2.21.0"
|
fakeredis = "^2.21.0"
|
||||||
pytest-xdist = "^3.5.0"
|
pytest-xdist = "^3.5.0"
|
||||||
|
|
@ -81,5 +79,5 @@ strict_optional = true
|
||||||
plugins = ["pydantic.mypy"]
|
plugins = ["pydantic.mypy"]
|
||||||
|
|
||||||
[[tool.mypy.overrides]]
|
[[tool.mypy.overrides]]
|
||||||
module = ["pytesseract.*", "cv2.*", "celery.*", "aiohttp.*", "aiohttp_socks.*", "tenacity.*", "pandas.*", "numpy.*", "PIL.*", "sqlmodel.*", "sqlalchemy.*", "alembic.*", "apprise.*", "opentelemetry.*", "webauthn.*"]
|
module = ["pytesseract.*", "cv2.*", "celery.*", "aiohttp.*", "aiohttp_socks.*", "tenacity.*", "numpy.*", "PIL.*", "sqlmodel.*", "sqlalchemy.*", "alembic.*", "opentelemetry.*", "webauthn.*"]
|
||||||
ignore_missing_imports = true
|
ignore_missing_imports = true
|
||||||
Loading…
Add table
Add a link
Reference in a new issue