Refactor codebase following Clean Code principles and add 229 tests

- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher)
  - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks
  - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens
  - Consolidate duplicate filter methods in listing_repository
  - Move hardcoded config to env vars with backward-compatible defaults
  - Simplify CLI decorator to auto-build QueryParameters
  - Add deprecation docstring to data_access.py
  - Test count: 158 → 387 (all passing)
This commit is contained in:
Viktor Barzin 2026-02-07 20:19:57 +00:00
parent 7e05b3c971
commit 150342bb9e
No known key found for this signature in database
GPG key ID: 0EB088298288D958
48 changed files with 5029 additions and 990 deletions

View file

@ -1,6 +1,6 @@
from datetime import datetime, timedelta
import logging
from typing import Generator
from typing import Any, Generator
from data_access import Listing
from models.listing import (
BuyListing,
@ -12,7 +12,6 @@ from models.listing import (
)
from sqlalchemy import Engine, func, select as sa_select
from sqlmodel import Session, select
from sqlmodel.sql.expression import SelectOfScalar
from tqdm import tqdm
logger = logging.getLogger("uvicorn.error")
@ -27,8 +26,10 @@ STREAMING_COLUMNS = [
class ListingRepository:
engine: Engine
# anything more than 10k is considered buy type
buy_listing_price_threshold: int = 20_000
# Monthly rent prices in the UK are always below 20,000 GBP.
# Any listing priced at or above this threshold is treated as a purchase (buy) listing.
BUY_LISTING_PRICE_THRESHOLD: int = 20_000
def __init__(self, engine: Engine):
self.engine = engine
@ -44,24 +45,16 @@ class ListingRepository:
"""
only_ids = only_ids or []
model = RentListing # if no query params, default to renting listings
if query_parameters:
model = (
RentListing
if query_parameters.listing_type == ListingType.RENT
else BuyListing
# else RentListing
)
model = self._get_model_for_query(query_parameters)
query = select(model)
if only_ids:
query = query.where(model.id.in_(only_ids)) # type: ignore
query = self._add_where_from_query_parameters(query, model, query_parameters)
query = self._apply_query_filters(query, model, query_parameters)
if limit:
query = query.limit(limit)
with Session(self.engine) as session:
# query = select(modelListing)
rows = list(session.exec(query).all())
logging.debug(f"Found {len(rows)} listings")
return rows
@ -81,16 +74,10 @@ class ListingRepository:
limit: Maximum number of listings to yield
chunk_size: Number of rows to fetch at a time from the database
"""
model = RentListing # if no query params, default to renting listings
if query_parameters:
model = (
RentListing
if query_parameters.listing_type == ListingType.RENT
else BuyListing
)
model = self._get_model_for_query(query_parameters)
query = select(model)
query = self._add_where_from_query_parameters(query, model, query_parameters)
query = self._apply_query_filters(query, model, query_parameters)
if limit:
query = query.limit(limit)
@ -111,7 +98,7 @@ class ListingRepository:
model = self._get_model_for_query(query_parameters)
query = sa_select(func.count(model.id))
query = self._add_where_from_query_parameters_raw(query, model, query_parameters)
query = self._apply_query_filters(query, model, query_parameters)
with Session(self.engine) as session:
return session.execute(query).scalar() or 0
@ -147,7 +134,7 @@ class ListingRepository:
break
query = sa_select(*columns)
query = self._add_where_from_query_parameters_raw(
query = self._apply_query_filters(
query, model, query_parameters
)
@ -174,13 +161,25 @@ class ListingRepository:
if len(results) < page_size:
break
def _add_where_from_query_parameters_raw(
def _apply_query_filters(
self,
query,
query: Any,
model: type[RentListing] | type[BuyListing],
query_parameters: QueryParameters | None = None,
):
"""Add WHERE clauses from query parameters (for raw SQLAlchemy selects)."""
) -> Any:
"""Apply WHERE clauses from query parameters to a query.
Works with both SQLModel select() and raw SQLAlchemy sa_select() queries,
since both support the .where() interface.
Args:
query: A SQLModel or SQLAlchemy select query
model: The listing model class (RentListing or BuyListing)
query_parameters: Optional filtering parameters
Returns:
The query with WHERE clauses applied
"""
if query_parameters is None:
return query
query = query.where(
@ -207,38 +206,6 @@ class ListingRepository:
query = query.where(model.last_seen >= last_seen_threshold)
return query
def _add_where_from_query_parameters(
self,
query: SelectOfScalar[Listing],
model: type[Listing],
query_parameters: QueryParameters | None = None,
) -> SelectOfScalar[Listing]:
if query_parameters is None:
return query
query = query.where(
model.number_of_bedrooms.between(
query_parameters.min_bedrooms, query_parameters.max_bedrooms
),
model.price.between(query_parameters.min_price, query_parameters.max_price),
)
if query_parameters.min_sqm is not None:
query = query.where(model.square_meters >= query_parameters.min_sqm)
if query_parameters.furnish_types and model == RentListing:
query = query.where(model.furnish_type.in_(query_parameters.furnish_types))
if (
isinstance(model, RentListing)
and query_parameters.let_date_available_from is not None
):
query = query.where(
model.available_from >= query_parameters.let_date_available_from
)
if query_parameters.last_seen_days is not None:
last_seen_threshold = datetime.now() - timedelta(
days=query_parameters.last_seen_days
)
query = query.where(model.last_seen >= last_seen_threshold)
return query
async def upsert_listings(
self,
listings: list[modelListing],
@ -258,50 +225,74 @@ class ListingRepository:
self,
listings: list[Listing],
) -> list[modelListing]:
"""
Upsert listings into the database.
"""Upsert legacy Listing objects into the database.
.. deprecated::
This method converts legacy data_access.Listing objects to SQLModel
entities. Use upsert_listings() with RentListing/BuyListing directly.
Legacy Listing objects from filesystem-based data may contain malformed
or incomplete data, so conversion errors are logged and skipped rather
than aborting the entire batch.
"""
models = []
failed_to_upsert = []
with Session(self.engine) as session:
for listing in tqdm(listings, desc="Upserting listings"):
# Convert Listing to modelListing
# Convert legacy Listing to the appropriate SQLModel entity
try:
model_listing = await self._get_concrete_listing(listing)
except Exception as e: # WHY SO MANY ERORRS??
# If for whatever reason we cannot add listing, ignore and retry
print(f"Error converting listing {listing.identifier}: {e}")
except Exception as e:
# Legacy Listing -> model conversion may fail for malformed data
# (e.g. missing required fields, invalid types). Log and skip.
logger.error(f"Error converting listing {listing.identifier}: {e}")
failed_to_upsert.append(listing)
continue
session.merge(model_listing)
models.append(model_listing)
session.commit()
print(f"Failed to upsert {len(failed_to_upsert)} listings.")
if failed_to_upsert:
logger.warning(f"Failed to upsert {len(failed_to_upsert)} listings.")
return models
@staticmethod
def _parse_furnish_type(listing: Listing) -> FurnishType:
"""Extract and normalize the furnish type from a legacy Listing's detail object.
Handles missing/null detailobject, missing property key, missing or null
letFurnishType value, and normalizes "landlord" variants to ASK_LANDLORD.
Args:
listing: A legacy data_access.Listing object
Returns:
The parsed FurnishType enum value
"""
if (
listing.detailobject is None
or listing.detailobject.get("property") is None
or listing.detailobject["property"].get("letFurnishType") is None
):
return FurnishType.UNKNOWN
furnish_type_str = listing.detailobject["property"]["letFurnishType"]
if furnish_type_str is None:
return FurnishType.UNKNOWN
elif "landlord" in furnish_type_str.lower():
furnish_type_str = "ask landlord"
else:
furnish_type_str = furnish_type_str.lower()
return FurnishType(furnish_type_str)
async def _get_concrete_listing(
self,
listing: Listing,
) -> modelListing:
now = datetime.now()
furnish_type = self._parse_furnish_type(listing)
if (
listing.detailobject is None
or listing.detailobject.get("property") is None
or listing.detailobject["property"].get("letFurnishType") is None
):
furnish_type_str = "unknown"
else:
furnish_type_str = listing.detailobject["property"]["letFurnishType"]
if furnish_type_str is None:
furnish_type_str = "unknown"
elif "landlord" in furnish_type_str.lower():
furnish_type_str = "ask landlord"
else:
furnish_type_str = furnish_type_str.lower()
furnish_type = FurnishType(furnish_type_str)
if listing.price < self.buy_listing_price_threshold:
if listing.price < self.BUY_LISTING_PRICE_THRESHOLD:
model_listing = RentListing(
id=listing.identifier,
price=listing.price,