Refactor codebase following Clean Code principles and add 229 tests

- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher)
  - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks
  - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens
  - Consolidate duplicate filter methods in listing_repository
  - Move hardcoded config to env vars with backward-compatible defaults
  - Simplify CLI decorator to auto-build QueryParameters
  - Add deprecation docstring to data_access.py
  - Test count: 158 → 387 (all passing)
This commit is contained in:
Viktor Barzin 2026-02-07 20:19:57 +00:00
parent 7e05b3c971
commit 150342bb9e
No known key found for this signature in database
GPG key ID: 0EB088298288D958
48 changed files with 5029 additions and 990 deletions

View file

@ -5,7 +5,7 @@ from datetime import datetime, timedelta
import enum
import json
from typing import Any, Dict, List
from pydantic import BaseModel, Field as PydanticField
from pydantic import BaseModel, Field as PydanticField, model_validator
from rec import routing
from sqlmodel import JSON, TEXT, SQLModel, Field
@ -52,6 +52,21 @@ class ListingSite(enum.StrEnum):
# ... add more
def _parse_price_history(price_history_json: str) -> list[PriceHistoryItem]:
"""Parse a JSON string into a list of PriceHistoryItem objects."""
if not price_history_json:
return []
parsed: list = json.loads(str(price_history_json))
return [
PriceHistoryItem(
first_seen=datetime.fromisoformat(item["first_seen"]),
last_seen=datetime.fromisoformat(item["last_seen"]),
price=item["price"],
)
for item in parsed
]
class Listing(SQLModel, table=False):
id: int = Field(primary_key=True)
price: float = Field(nullable=False, index=True)
@ -61,7 +76,6 @@ class Listing(SQLModel, table=False):
council_tax_band: str | None = Field(default=None, nullable=True)
longitude: float = Field(nullable=False)
latitude: float = Field(nullable=False)
# price_history: List[Dict[str, Any]] = Field(default_factory=list, sa_type=JSON)
price_history_json: str = Field(sa_type=TEXT)
listing_site: ListingSite = Field(nullable=False)
last_seen: datetime = Field(
@ -103,20 +117,7 @@ class Listing(SQLModel, table=False):
"""
Returns a list of PriceHistoryItem objects from the price_history_json.
"""
if not self.price_history_json:
return []
parsed: list = json.loads(str(self.price_history_json))
for item in parsed:
item["first_seen"] = datetime.fromisoformat(item["first_seen"])
item["last_seen"] = datetime.fromisoformat(item["last_seen"])
return [
PriceHistoryItem(
first_seen=item["first_seen"],
last_seen=item["last_seen"],
price=item["price"],
)
for item in parsed
]
return _parse_price_history(self.price_history_json)
@staticmethod
def serialize_price_history(price_history: List[PriceHistoryItem]) -> str:
@ -142,36 +143,8 @@ class Listing(SQLModel, table=False):
"""
if not self.routing_info_json:
return {}
# TODO: move to a separate serializer class
json_data = json.loads(self.routing_info_json)
destimation_routes = {}
for destination_mode_str, routes_json in json_data.items():
destination_mode = DestinationMode(
destination_address=json.loads(destination_mode_str)[
"destination_address"
],
travel_mode=routing.TravelMode(
json.loads(destination_mode_str)["travel_mode"]
),
)
parsed_route = json.loads(routes_json[0])
routes = [
Route(
legs=[
RouteLegStep(
distance_meters=step["distance_meters"],
duration_s=step["duration_s"],
travel_mode=routing.TravelMode(step["travel_mode"]),
)
for step in parsed_route["legs"]
],
distance_meters=parsed_route["distance_meters"],
duration_s=int(parsed_route["duration_s"]),
)
]
destimation_routes[destination_mode] = routes
return destimation_routes
from rec.route_serializer import RouteSerializer
return RouteSerializer.deserialize(self.routing_info_json)
def serialize_routing_info(
self, routing_info: dict[DestinationMode, list[Route]]
@ -179,17 +152,8 @@ class Listing(SQLModel, table=False):
"""
Serializes the routing_info to a JSON string.
"""
# TODO: move to a separate serializer class
# for destination_mode, routes in routing_info.items():
serialized = json.dumps(
{
json.dumps(dataclasses.asdict(destination_mode)): [
json.dumps(dataclasses.asdict(route)) for route in routes
]
for destination_mode, routes in routing_info.items()
}
)
return serialized
from rec.route_serializer import RouteSerializer
return RouteSerializer.serialize(routing_info)
class FurnishType(enum.StrEnum):
@ -224,9 +188,9 @@ class DestinationMode:
# This allows serializers to pick up a dict representation
return asdict(self)
def __iter__(self):
# Makes it behave like a dict when expected
return iter(asdict(self).items())
def to_dict(self) -> dict[str, Any]:
"""Return a dictionary representation of this DestinationMode."""
return asdict(self)
class ListingType(enum.StrEnum):
@ -254,3 +218,23 @@ class QueryParameters(BaseModel):
let_date_available_from: datetime | None = None
last_seen_days: int | None = None
min_sqm: int | None = None
@model_validator(mode="after")
def _validate_ranges(self) -> QueryParameters:
if self.min_price > self.max_price:
raise ValueError(
f"min_price ({self.min_price}) must be <= max_price ({self.max_price})"
)
if self.min_bedrooms < 0:
raise ValueError(
f"min_bedrooms ({self.min_bedrooms}) must be non-negative"
)
if self.max_bedrooms < 0:
raise ValueError(
f"max_bedrooms ({self.max_bedrooms}) must be non-negative"
)
if self.min_bedrooms > self.max_bedrooms:
raise ValueError(
f"min_bedrooms ({self.min_bedrooms}) must be <= max_bedrooms ({self.max_bedrooms})"
)
return self