Refactor codebase following Clean Code principles and add 229 tests

- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher)
  - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks
  - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens
  - Consolidate duplicate filter methods in listing_repository
  - Move hardcoded config to env vars with backward-compatible defaults
  - Simplify CLI decorator to auto-build QueryParameters
  - Add deprecation docstring to data_access.py
  - Test count: 158 → 387 (all passing)
This commit is contained in:
Viktor Barzin 2026-02-07 20:19:57 +00:00
parent 7e05b3c971
commit 150342bb9e
No known key found for this signature in database
GPG key ID: 0EB088298288D958
48 changed files with 5029 additions and 990 deletions

View file

@ -22,13 +22,50 @@ P = ParamSpec("P")
R = TypeVar("R")
def build_query_parameters(
type: str,
district: list[str] | tuple[str, ...] | None,
min_bedrooms: int,
max_bedrooms: int,
min_price: int,
max_price: int,
furnish_types: list[str] | tuple[str, ...],
available_from: datetime | None,
last_seen_days: int,
min_sqm: int | None = None,
radius: int = 0,
page_size: int = 500,
max_days_since_added: int = 14,
) -> QueryParameters:
"""Build QueryParameters from CLI options."""
return QueryParameters(
listing_type=ListingType[type],
district_names=set(district) if district else set(),
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,
min_price=min_price,
max_price=max_price,
furnish_types=[FurnishType[ft] for ft in furnish_types] if furnish_types else None,
let_date_available_from=available_from,
last_seen_days=last_seen_days,
min_sqm=min_sqm,
radius=radius,
page_size=page_size,
max_days_since_added=max_days_since_added,
)
def listing_filter_options(func: Callable[P, R]) -> Callable[P, R]:
"""Decorator to add common options for filtering listings."""
"""Decorator that adds common listing filter options and builds QueryParameters.
The wrapped function receives a `query_parameters: QueryParameters` kwarg
instead of individual filter values.
"""
@click.option(
"--type",
"-t",
help="Type of listing to scrape",
help="Type of listing to scrape (BUY or RENT)",
type=click.Choice(
ListingType.__members__.keys(),
case_sensitive=False,
@ -50,26 +87,26 @@ def listing_filter_options(func: Callable[P, R]) -> Callable[P, R]:
@click.option(
"--min-price",
default=0,
help="Minimum price",
help="Minimum price in GBP",
type=click.IntRange(min=0),
)
@click.option(
"--max-price",
default=999_999,
help="Maximum price",
help="Maximum price in GBP",
type=click.IntRange(min=0),
)
@click.option(
"--district",
default=None,
help="Districts to scrape",
help="District to filter by (can be repeated for multiple districts)",
type=click.Choice(district_service.get_district_names(), case_sensitive=False),
multiple=True,
)
@click.option(
"--furnish-types",
"-f",
help="Furnish types for rented listings",
help="Furnish type filter for rented listings (can be repeated)",
type=click.Choice(
[furnish_type.name for furnish_type in FurnishType.__members__.values()],
case_sensitive=False,
@ -78,13 +115,13 @@ def listing_filter_options(func: Callable[P, R]) -> Callable[P, R]:
)
@click.option(
"--available-from",
help="Let date available from",
help="Only include listings available from this date (format: YYYY-MM-DD)",
default=None,
type=click.DateTime(),
)
@click.option(
"--last-seen-days",
help="Last seen (days). If set, only listings that were seen in the last N days will be included.",
help="Only include listings seen in the last N days",
default=14,
type=int,
)
@ -95,45 +132,37 @@ def listing_filter_options(func: Callable[P, R]) -> Callable[P, R]:
type=int,
)
@wraps(func)
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
return func(*args, **kwargs)
def wrapper(
*args: P.args,
type: str,
district: tuple[str, ...],
min_bedrooms: int,
max_bedrooms: int,
min_price: int,
max_price: int,
furnish_types: tuple[str, ...],
available_from: datetime | None,
last_seen_days: int,
min_sqm: int | None,
**kwargs: P.kwargs,
) -> R:
query_parameters = build_query_parameters(
type=type,
district=district,
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,
min_price=min_price,
max_price=max_price,
furnish_types=furnish_types,
available_from=available_from,
last_seen_days=last_seen_days,
min_sqm=min_sqm,
)
return func(*args, query_parameters=query_parameters, **kwargs)
return wrapper
def build_query_parameters(
type: str,
district: list[str],
min_bedrooms: int,
max_bedrooms: int,
min_price: int,
max_price: int,
furnish_types: list[str],
available_from: datetime | None,
last_seen_days: int,
min_sqm: int | None = None,
radius: int = 0,
page_size: int = 500,
max_days_since_added: int = 14,
) -> QueryParameters:
"""Build QueryParameters from CLI options."""
return QueryParameters(
listing_type=ListingType[type],
district_names=set(district) if district else None,
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,
min_price=min_price,
max_price=max_price,
furnish_types=[FurnishType[ft] for ft in furnish_types] if furnish_types else None,
let_date_available_from=available_from,
last_seen_days=last_seen_days,
min_sqm=min_sqm,
radius=radius,
page_size=page_size,
max_days_since_added=max_days_since_added,
)
@click.group()
@click.option(
"--data-dir",
@ -155,46 +184,28 @@ def cli(ctx: click.Context, data_dir: str) -> None:
@cli.command()
@listing_filter_options
@click.option("--full", is_flag=True, help="Include images and floorplan detection")
@click.option(
"--include-processing",
"-p",
is_flag=True,
help="Also download images and run floorplan OCR detection",
)
@click.pass_context
def dump_listings(
ctx: click.Context,
full: bool,
district: list[str],
min_bedrooms: int,
max_bedrooms: int,
min_price: int,
max_price: int,
type: str,
furnish_types: list[str],
available_from: datetime | None,
last_seen_days: int,
min_sqm: int | None = None,
query_parameters: QueryParameters,
include_processing: bool,
) -> None:
"""Fetch listings from Rightmove API."""
data_dir: pathlib.Path = ctx.obj["data_dir"]
repository: ListingRepository = ctx.obj["repository"]
query_parameters = build_query_parameters(
type=type,
district=district,
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,
min_price=min_price,
max_price=max_price,
furnish_types=furnish_types,
available_from=available_from,
last_seen_days=last_seen_days,
min_sqm=min_sqm,
)
click.echo(f"Fetching listings with parameters: {query_parameters}")
result = asyncio.run(
listing_service.refresh_listings(
repository,
query_parameters,
full=full,
full=include_processing,
async_mode=False,
)
)
@ -240,14 +251,14 @@ def detect_floorplan(ctx: click.Context) -> None:
@click.option(
"--travel-mode",
"-m",
help="Travel mode for routing",
help="Travel mode for routing (e.g. transit, driving, walking, bicycling)",
type=click.Choice(TravelMode.__members__.keys(), case_sensitive=False),
required=True,
)
@click.option(
"--limit",
"-l",
help="Limit the number of listings to process",
help="Maximum number of listings to calculate routes for",
type=click.IntRange(min=1),
default=1,
)
@ -293,33 +304,11 @@ def routing(
def export_csv(
ctx: click.Context,
output_file: str,
district: list[str],
min_bedrooms: int,
max_bedrooms: int,
min_price: int,
max_price: int,
type: str,
furnish_types: list[str],
available_from: datetime | None,
last_seen_days: int,
min_sqm: int | None = None,
query_parameters: QueryParameters,
) -> None:
"""Export listings to CSV file."""
repository: ListingRepository = ctx.obj["repository"]
query_parameters = build_query_parameters(
type=type,
district=district,
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,
min_price=min_price,
max_price=max_price,
furnish_types=furnish_types,
available_from=available_from,
last_seen_days=last_seen_days,
min_sqm=min_sqm,
)
click.echo(f"Exporting to {output_file}")
result = asyncio.run(
@ -346,33 +335,11 @@ def export_csv(
def export_immoweb(
ctx: click.Context,
output_file: str,
district: list[str],
min_bedrooms: int,
max_bedrooms: int,
min_price: int,
max_price: int,
type: str,
furnish_types: list[str],
available_from: datetime | None,
last_seen_days: int,
min_sqm: int | None = None,
query_parameters: QueryParameters,
) -> None:
"""Export listings to GeoJSON file for map visualization."""
repository: ListingRepository = ctx.obj["repository"]
query_parameters = build_query_parameters(
type=type,
district=district,
min_bedrooms=min_bedrooms,
max_bedrooms=max_bedrooms,
min_price=min_price,
max_price=max_price,
furnish_types=furnish_types,
available_from=available_from,
last_seen_days=last_seen_days,
min_sqm=min_sqm,
)
click.echo(f"Exporting to {output_file}")
result = asyncio.run(