Enables POI management and distance calculation from the command line, following the existing Click command patterns with asyncio.run().
474 lines
13 KiB
Python
474 lines
13 KiB
Python
"""CLI entry point for the Real Estate Crawler."""
|
|
import asyncio
|
|
from datetime import datetime
|
|
import os
|
|
import pathlib
|
|
from typing import Callable, ParamSpec, TypeVar
|
|
import click
|
|
|
|
from models.listing import FurnishType, ListingType, QueryParameters
|
|
from data_access import Listing
|
|
from rec.routing import API_KEY_ENVIRONMENT_VARIABLE, TravelMode
|
|
from repositories.listing_repository import ListingRepository
|
|
from functools import wraps
|
|
from database import engine
|
|
from services import (
|
|
listing_service,
|
|
export_service,
|
|
district_service,
|
|
poi_service,
|
|
)
|
|
from repositories.poi_repository import POIRepository
|
|
from repositories.user_repository import UserRepository
|
|
|
|
P = ParamSpec("P")
|
|
R = TypeVar("R")
|
|
|
|
|
|
def build_query_parameters(
|
|
type: str,
|
|
district: list[str] | tuple[str, ...] | None,
|
|
min_bedrooms: int,
|
|
max_bedrooms: int,
|
|
min_price: int,
|
|
max_price: int,
|
|
furnish_types: list[str] | tuple[str, ...],
|
|
available_from: datetime | None,
|
|
last_seen_days: int,
|
|
min_sqm: int | None = None,
|
|
radius: int = 0,
|
|
page_size: int = 500,
|
|
max_days_since_added: int = 14,
|
|
) -> QueryParameters:
|
|
"""Build QueryParameters from CLI options."""
|
|
return QueryParameters(
|
|
listing_type=ListingType[type],
|
|
district_names=set(district) if district else set(),
|
|
min_bedrooms=min_bedrooms,
|
|
max_bedrooms=max_bedrooms,
|
|
min_price=min_price,
|
|
max_price=max_price,
|
|
furnish_types=[FurnishType[ft] for ft in furnish_types] if furnish_types else None,
|
|
let_date_available_from=available_from,
|
|
last_seen_days=last_seen_days,
|
|
min_sqm=min_sqm,
|
|
radius=radius,
|
|
page_size=page_size,
|
|
max_days_since_added=max_days_since_added,
|
|
)
|
|
|
|
|
|
def listing_filter_options(func: Callable[P, R]) -> Callable[P, R]:
|
|
"""Decorator that adds common listing filter options and builds QueryParameters.
|
|
|
|
The wrapped function receives a `query_parameters: QueryParameters` kwarg
|
|
instead of individual filter values.
|
|
"""
|
|
|
|
@click.option(
|
|
"--type",
|
|
"-t",
|
|
help="Type of listing to scrape (BUY or RENT)",
|
|
type=click.Choice(
|
|
ListingType.__members__.keys(),
|
|
case_sensitive=False,
|
|
),
|
|
required=True,
|
|
)
|
|
@click.option(
|
|
"--min-bedrooms",
|
|
default=1,
|
|
help="Minimum number of bedrooms",
|
|
type=click.IntRange(min=1),
|
|
)
|
|
@click.option(
|
|
"--max-bedrooms",
|
|
default=10,
|
|
help="Maximum number of bedrooms",
|
|
type=click.IntRange(min=1, max=10),
|
|
)
|
|
@click.option(
|
|
"--min-price",
|
|
default=0,
|
|
help="Minimum price in GBP",
|
|
type=click.IntRange(min=0),
|
|
)
|
|
@click.option(
|
|
"--max-price",
|
|
default=999_999,
|
|
help="Maximum price in GBP",
|
|
type=click.IntRange(min=0),
|
|
)
|
|
@click.option(
|
|
"--district",
|
|
default=None,
|
|
help="District to filter by (can be repeated for multiple districts)",
|
|
type=click.Choice(district_service.get_district_names(), case_sensitive=False),
|
|
multiple=True,
|
|
)
|
|
@click.option(
|
|
"--furnish-types",
|
|
"-f",
|
|
help="Furnish type filter for rented listings (can be repeated)",
|
|
type=click.Choice(
|
|
[furnish_type.name for furnish_type in FurnishType.__members__.values()],
|
|
case_sensitive=False,
|
|
),
|
|
multiple=True,
|
|
)
|
|
@click.option(
|
|
"--available-from",
|
|
help="Only include listings available from this date (format: YYYY-MM-DD)",
|
|
default=None,
|
|
type=click.DateTime(),
|
|
)
|
|
@click.option(
|
|
"--last-seen-days",
|
|
help="Only include listings seen in the last N days",
|
|
default=14,
|
|
type=int,
|
|
)
|
|
@click.option(
|
|
"--min-sqm",
|
|
help="Minimum square meters for the listing",
|
|
default=None,
|
|
type=int,
|
|
)
|
|
@wraps(func)
|
|
def wrapper(
|
|
*args: P.args,
|
|
type: str,
|
|
district: tuple[str, ...],
|
|
min_bedrooms: int,
|
|
max_bedrooms: int,
|
|
min_price: int,
|
|
max_price: int,
|
|
furnish_types: tuple[str, ...],
|
|
available_from: datetime | None,
|
|
last_seen_days: int,
|
|
min_sqm: int | None,
|
|
**kwargs: P.kwargs,
|
|
) -> R:
|
|
query_parameters = build_query_parameters(
|
|
type=type,
|
|
district=district,
|
|
min_bedrooms=min_bedrooms,
|
|
max_bedrooms=max_bedrooms,
|
|
min_price=min_price,
|
|
max_price=max_price,
|
|
furnish_types=furnish_types,
|
|
available_from=available_from,
|
|
last_seen_days=last_seen_days,
|
|
min_sqm=min_sqm,
|
|
)
|
|
return func(*args, query_parameters=query_parameters, **kwargs)
|
|
|
|
return wrapper
|
|
|
|
|
|
@click.group()
|
|
@click.option(
|
|
"--data-dir",
|
|
default=pathlib.Path("data/rs/"),
|
|
help="Data directory for storing listings",
|
|
type=click.Path(
|
|
writable=True,
|
|
file_okay=False,
|
|
dir_okay=True,
|
|
resolve_path=True,
|
|
),
|
|
)
|
|
@click.pass_context
|
|
def cli(ctx: click.Context, data_dir: str) -> None:
|
|
ctx.ensure_object(dict)
|
|
ctx.obj["data_dir"] = pathlib.Path(data_dir)
|
|
ctx.obj["repository"] = ListingRepository(engine=engine)
|
|
|
|
|
|
@cli.command()
|
|
@listing_filter_options
|
|
@click.option(
|
|
"--include-processing",
|
|
"-p",
|
|
is_flag=True,
|
|
help="Also download images and run floorplan OCR detection",
|
|
)
|
|
@click.pass_context
|
|
def dump_listings(
|
|
ctx: click.Context,
|
|
query_parameters: QueryParameters,
|
|
include_processing: bool,
|
|
) -> None:
|
|
"""Fetch listings from Rightmove API."""
|
|
repository: ListingRepository = ctx.obj["repository"]
|
|
|
|
click.echo(f"Fetching listings with parameters: {query_parameters}")
|
|
|
|
result = asyncio.run(
|
|
listing_service.refresh_listings(
|
|
repository,
|
|
query_parameters,
|
|
full=include_processing,
|
|
async_mode=False,
|
|
)
|
|
)
|
|
|
|
click.echo(result.message)
|
|
|
|
|
|
@cli.command()
|
|
@click.pass_context
|
|
def dump_images(ctx: click.Context) -> None:
|
|
"""Download floorplan images for all listings."""
|
|
data_dir: pathlib.Path = ctx.obj["data_dir"]
|
|
repository: ListingRepository = ctx.obj["repository"]
|
|
|
|
click.echo(f"Downloading images to {data_dir}")
|
|
|
|
count = asyncio.run(listing_service.download_images(repository, data_dir))
|
|
|
|
click.echo(f"Processed {count} listings")
|
|
|
|
|
|
@cli.command()
|
|
@click.pass_context
|
|
def detect_floorplan(ctx: click.Context) -> None:
|
|
"""Run OCR on floorplan images to detect square meters."""
|
|
repository: ListingRepository = ctx.obj["repository"]
|
|
|
|
click.echo("Running floorplan detection...")
|
|
|
|
count = asyncio.run(listing_service.detect_floorplans(repository))
|
|
|
|
click.echo(f"Processed {count} listings")
|
|
|
|
|
|
@cli.command()
|
|
@click.option(
|
|
"--destination-address",
|
|
"-d",
|
|
help="Destination address for routing",
|
|
required=True,
|
|
type=click.STRING,
|
|
)
|
|
@click.option(
|
|
"--travel-mode",
|
|
"-m",
|
|
help="Travel mode for routing (e.g. transit, driving, walking, bicycling)",
|
|
type=click.Choice(TravelMode.__members__.keys(), case_sensitive=False),
|
|
required=True,
|
|
)
|
|
@click.option(
|
|
"--limit",
|
|
"-l",
|
|
help="Maximum number of listings to calculate routes for",
|
|
type=click.IntRange(min=1),
|
|
default=1,
|
|
)
|
|
@click.pass_context
|
|
def routing(
|
|
ctx: click.Context,
|
|
destination_address: str,
|
|
travel_mode: str,
|
|
limit: int,
|
|
) -> None:
|
|
"""Calculate transit routes for listings."""
|
|
repository: ListingRepository = ctx.obj["repository"]
|
|
|
|
if os.environ.get(API_KEY_ENVIRONMENT_VARIABLE) is None:
|
|
raise click.ClickException(
|
|
f"{API_KEY_ENVIRONMENT_VARIABLE} environment variable is not set."
|
|
)
|
|
|
|
click.echo(f"Calculating routes to '{destination_address}' for {limit} listings")
|
|
|
|
count = asyncio.run(
|
|
listing_service.calculate_routes(
|
|
repository,
|
|
destination_address,
|
|
travel_mode,
|
|
limit=limit,
|
|
)
|
|
)
|
|
|
|
click.echo(f"Processed {count} listings")
|
|
|
|
|
|
@cli.command()
|
|
@click.option(
|
|
"--output-file",
|
|
"-O",
|
|
help="Path to the output CSV file",
|
|
required=True,
|
|
type=click.Path(writable=True, file_okay=True, dir_okay=False, resolve_path=True),
|
|
)
|
|
@listing_filter_options
|
|
@click.pass_context
|
|
def export_csv(
|
|
ctx: click.Context,
|
|
output_file: str,
|
|
query_parameters: QueryParameters,
|
|
) -> None:
|
|
"""Export listings to CSV file."""
|
|
repository: ListingRepository = ctx.obj["repository"]
|
|
|
|
click.echo(f"Exporting to {output_file}")
|
|
|
|
result = asyncio.run(
|
|
export_service.export_to_csv(
|
|
repository,
|
|
pathlib.Path(output_file),
|
|
query_parameters,
|
|
)
|
|
)
|
|
|
|
click.echo(result.message)
|
|
|
|
|
|
@cli.command()
|
|
@click.option(
|
|
"--output-file",
|
|
"-O",
|
|
help="Path to the output GeoJSON file",
|
|
required=True,
|
|
type=click.Path(writable=True, file_okay=True, dir_okay=False, resolve_path=True),
|
|
)
|
|
@listing_filter_options
|
|
@click.pass_context
|
|
def export_immoweb(
|
|
ctx: click.Context,
|
|
output_file: str,
|
|
query_parameters: QueryParameters,
|
|
) -> None:
|
|
"""Export listings to GeoJSON file for map visualization."""
|
|
repository: ListingRepository = ctx.obj["repository"]
|
|
|
|
click.echo(f"Exporting to {output_file}")
|
|
|
|
result = asyncio.run(
|
|
export_service.export_to_geojson(
|
|
repository,
|
|
query_parameters=query_parameters,
|
|
output_path=pathlib.Path(output_file),
|
|
)
|
|
)
|
|
|
|
click.echo(result.message)
|
|
|
|
|
|
@cli.command()
|
|
@click.pass_context
|
|
def populate_db(ctx: click.Context) -> None:
|
|
"""Populate database from filesystem data (legacy migration)."""
|
|
data_dir: pathlib.Path = ctx.obj["data_dir"]
|
|
repository: ListingRepository = ctx.obj["repository"]
|
|
|
|
click.echo(f"Populating database from {data_dir}")
|
|
|
|
listings = Listing.get_all_listings(
|
|
[path for path in data_dir.glob("*/listing.json")]
|
|
)
|
|
|
|
asyncio.run(repository.upsert_listings_legacy(listings))
|
|
|
|
click.echo(f"Imported {len(listings)} listings")
|
|
|
|
|
|
@cli.command()
|
|
def list_districts() -> None:
|
|
"""List all available districts."""
|
|
districts = district_service.get_all_districts()
|
|
click.echo(f"Available districts ({len(districts)}):")
|
|
for name in sorted(districts.keys()):
|
|
click.echo(f" - {name}")
|
|
|
|
|
|
@cli.command()
|
|
@click.option("--name", required=True, help="Name for the POI (e.g., 'Office')")
|
|
@click.option("--address", required=True, help="Human-readable address")
|
|
@click.option("--lat", required=True, type=float, help="Latitude")
|
|
@click.option("--lon", required=True, type=float, help="Longitude")
|
|
@click.option("--user-email", required=True, help="User email to associate POI with")
|
|
def add_poi(name: str, address: str, lat: float, lon: float, user_email: str) -> None:
|
|
"""Create a Point of Interest."""
|
|
user_repo = UserRepository(engine)
|
|
db_user = user_repo.get_user_by_email(user_email)
|
|
if db_user is None:
|
|
db_user = user_repo.create_user(user_email)
|
|
|
|
poi_repo = POIRepository(engine)
|
|
result = poi_service.create_poi(
|
|
poi_repo,
|
|
user_id=db_user.id, # type: ignore[arg-type]
|
|
name=name,
|
|
address=address,
|
|
latitude=lat,
|
|
longitude=lon,
|
|
)
|
|
click.echo(f"Created POI: {result.poi.name} (id={result.poi.id})")
|
|
|
|
|
|
@cli.command()
|
|
@click.option("--user-email", required=True, help="User email to list POIs for")
|
|
def list_pois(user_email: str) -> None:
|
|
"""List POIs for a user."""
|
|
user_repo = UserRepository(engine)
|
|
db_user = user_repo.get_user_by_email(user_email)
|
|
if db_user is None:
|
|
click.echo(f"User '{user_email}' not found")
|
|
return
|
|
|
|
poi_repo = POIRepository(engine)
|
|
pois = poi_service.get_user_pois(poi_repo, db_user.id) # type: ignore[arg-type]
|
|
if not pois:
|
|
click.echo("No POIs found")
|
|
return
|
|
|
|
for p in pois:
|
|
click.echo(f" [{p.id}] {p.name} - {p.address} ({p.latitude}, {p.longitude})")
|
|
|
|
|
|
@cli.command()
|
|
@click.option("--poi-id", required=True, type=int, help="POI ID to calculate distances for")
|
|
@click.option(
|
|
"--travel-modes",
|
|
required=True,
|
|
help="Comma-separated travel modes (WALK, BICYCLE, TRANSIT)",
|
|
)
|
|
@click.option(
|
|
"--listing-type",
|
|
required=True,
|
|
type=click.Choice(ListingType.__members__.keys(), case_sensitive=False),
|
|
help="Listing type (BUY or RENT)",
|
|
)
|
|
def calculate_poi(poi_id: int, travel_modes: str, listing_type: str) -> None:
|
|
"""Calculate distances from listings to a POI."""
|
|
from services.poi_distance_calculator import calculate_poi_distances as calc
|
|
from config.routing_config import RoutingConfig
|
|
|
|
poi_repo = POIRepository(engine)
|
|
poi = poi_repo.get_poi_by_id(poi_id)
|
|
if poi is None:
|
|
click.echo(f"POI {poi_id} not found")
|
|
return
|
|
|
|
listing_repo = ListingRepository(engine=engine)
|
|
lt = ListingType[listing_type]
|
|
modes = [m.strip().upper() for m in travel_modes.split(",")]
|
|
|
|
click.echo(f"Calculating {modes} distances for POI '{poi.name}' ({lt.value} listings)...")
|
|
|
|
total = asyncio.run(calc(
|
|
listing_repo=listing_repo,
|
|
poi_repo=poi_repo,
|
|
poi=poi,
|
|
travel_modes=modes,
|
|
listing_type=lt,
|
|
config=RoutingConfig.from_env(),
|
|
))
|
|
|
|
click.echo(f"Computed {total} distances")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
cli()
|