migrate routing command to use the models and store data there

This commit is contained in:
Viktor Barzin 2025-06-08 11:45:05 +00:00
parent 325823e631
commit 80c335ba04
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
6 changed files with 194 additions and 108 deletions

View file

@ -1,45 +1,67 @@
from data_access import Listing from models.listing import DestinationMode, Route, RouteLegStep
from tqdm import tqdm from repositories.listing_repository import ListingRepository
from tqdm.asyncio import tqdm
from rec import routing from rec import routing
from models import Listing
async def calculate_route( async def calculate_route(
listing_paths: list[str], repository: ListingRepository,
destination_address: str, destination_address: str,
travel_mode: routing.TravelMode, travel_mode: routing.TravelMode,
limit: int | None = None,
) -> None: ) -> None:
listings = await repository.get_listings()
listings = Listing.get_all_listings(listing_paths) if limit is not None:
listings = listings[:limit]
# reduce listings to everything within 7 miles destimation_mode = DestinationMode(destination_address, travel_mode)
filtered_listings = [] updated_listings = await tqdm.gather(
for listing in listings: *[update_routing_info(listing, destimation_mode) for listing in listings],
print(f"Processing {listing.identifier}") total=len(listings),
if listing.isRemoved: desc="Updating routing info",
print(f"Removed-Skip: Skipping {listing.identifier} " "is already removed.") )
continue await repository.upsert_listings(
sqm_ocr = await listing.sqm_ocr() [listing for listing in updated_listings if listing is not None]
if sqm_ocr is None or sqm_ocr < 30 or sqm_ocr > 200: )
print(
(
f"Floorplan-Skip: Skipping {listing.identifier} as " async def update_routing_info(
f"sqm_ocr is {sqm_ocr}" listing: Listing, destination_mode: DestinationMode
) -> Listing | None:
if listing.routing_info.get(destination_mode) is not None:
# already calculated, do not recompute to save API calls
return None
routes_data = routing.transit_route(
listing.latitude,
listing.longtitude,
destination_mode.destination_address,
destination_mode.travel_mode,
)
route_data = routes_data["routes"][0]
routes = []
for route_data in routes_data["routes"]:
duration_s = int(route_data["duration"].split("s")[0])
route = Route(
legs=[
RouteLegStep(
distance_meters=step_data["distanceMeters"],
duration_s=int(step_data["staticDuration"].split("s")[0]),
travel_mode=routing.TravelMode(step_data["travelMode"]),
) )
) for step_data in route_data["legs"][0]["steps"]
continue ],
filtered_listings.append(listing) distance_meters=route_data["distanceMeters"],
duration_s=duration_s,
print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}")
for listing in tqdm(filtered_listings):
listing.calculate_route(
destination_address,
travel_mode,
recalculate=False,
) )
traveltime = listing.travel_time(destination_address, travel_mode)[0] routes.append(route)
duration_minutes = traveltime["duration"] / 60.0 listing.routing_info_json = listing.serialize_routing_info(
tqdm.write(f"{listing.identifier} {duration_minutes}") {**listing.routing_info, **{destination_mode: routes}}
)
return listing
# async def geocode_address( # async def geocode_address(

View file

@ -1,8 +1,8 @@
"""add more fields to tables """add more fields to tables
Revision ID: b78e1ed31eed Revision ID: 72b7410ff3e6
Revises: Revises:
Create Date: 2025-06-06 19:50:09.773676 Create Date: 2025-06-08 10:56:22.448446
""" """
@ -14,7 +14,7 @@ import sqlmodel
# revision identifiers, used by Alembic. # revision identifiers, used by Alembic.
revision: str = "b78e1ed31eed" revision: str = "72b7410ff3e6"
down_revision: Union[str, None] = None down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None
@ -30,6 +30,9 @@ def upgrade() -> None:
sa.Column("number_of_bedrooms", sa.Integer(), nullable=False), sa.Column("number_of_bedrooms", sa.Integer(), nullable=False),
sa.Column("square_meters", sa.Float(), nullable=True), sa.Column("square_meters", sa.Float(), nullable=True),
sa.Column("agency", sqlmodel.sql.sqltypes.AutoString(), nullable=True), sa.Column("agency", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column(
"council_tax_band", sqlmodel.sql.sqltypes.AutoString(), nullable=True
),
sa.Column("longtitude", sa.Float(), nullable=False), sa.Column("longtitude", sa.Float(), nullable=False),
sa.Column("latitude", sa.Float(), nullable=False), sa.Column("latitude", sa.Float(), nullable=False),
sa.Column("price_history", sa.JSON(), nullable=False), sa.Column("price_history", sa.JSON(), nullable=False),
@ -38,11 +41,10 @@ def upgrade() -> None:
), ),
sa.Column("last_seen", sa.DateTime(), nullable=False), sa.Column("last_seen", sa.DateTime(), nullable=False),
sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True), sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column("floorplan_image_paths", sa.JSON(), nullable=False),
sa.Column("additional_info", sa.JSON(), nullable=False), sa.Column("additional_info", sa.JSON(), nullable=False),
sa.Column("routing_info_json", sa.String(), nullable=True),
sa.Column("service_charge", sa.Float(), nullable=True), sa.Column("service_charge", sa.Float(), nullable=True),
sa.Column(
"council_tax_band", sqlmodel.sql.sqltypes.AutoString(), nullable=True
),
sa.Column("lease_left", sa.Integer(), nullable=True), sa.Column("lease_left", sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint("id"), sa.PrimaryKeyConstraint("id"),
) )
@ -64,7 +66,9 @@ def upgrade() -> None:
), ),
sa.Column("last_seen", sa.DateTime(), nullable=False), sa.Column("last_seen", sa.DateTime(), nullable=False),
sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True), sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column("floorplan_image_paths", sa.JSON(), nullable=False),
sa.Column("additional_info", sa.JSON(), nullable=False), sa.Column("additional_info", sa.JSON(), nullable=False),
sa.Column("routing_info_json", sa.String(), nullable=True),
sa.Column("available_from", sa.DateTime(), nullable=True), sa.Column("available_from", sa.DateTime(), nullable=True),
sa.Column( sa.Column(
"furnish_type", "furnish_type",

View file

@ -1,34 +0,0 @@
"""add more fields to tables
Revision ID: 8a7accc583c9
Revises: b2ffa638aafc
Create Date: 2025-06-07 13:38:08.805386
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '8a7accc583c9'
down_revision: Union[str, None] = 'b2ffa638aafc'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('buylisting', sa.Column('floorplan_image_paths', sa.JSON(), nullable=False))
op.add_column('rentlisting', sa.Column('floorplan_image_paths', sa.JSON(), nullable=False))
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('rentlisting', 'floorplan_image_paths')
op.drop_column('buylisting', 'floorplan_image_paths')
# ### end Alembic commands ###

View file

@ -1,32 +0,0 @@
"""add more fields to tables
Revision ID: b2ffa638aafc
Revises: b78e1ed31eed
Create Date: 2025-06-07 12:18:28.963851
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = 'b2ffa638aafc'
down_revision: Union[str, None] = 'b78e1ed31eed'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###

View file

@ -118,7 +118,6 @@ def listing_filter_options(func):
def cli(ctx, data_dir: str): def cli(ctx, data_dir: str):
ctx.ensure_object(dict) ctx.ensure_object(dict)
ctx.obj["data_dir"] = data_dir ctx.obj["data_dir"] = data_dir
pass
@cli.command() @cli.command()
@ -218,12 +217,14 @@ def routing(
"Please set it to your API key for the routing service." "Please set it to your API key for the routing service."
) )
repository = ListingRepository(engine=engine)
asyncio.run( asyncio.run(
routing_module.calculate_route( routing_module.calculate_route(
listing_paths, repository,
destination_address, destination_address,
# destination_address_coordinates, # destination_address_coordinates,
TravelMode[travel_mode], TravelMode[travel_mode],
limit=limit,
) )
) )

View file

@ -1,9 +1,13 @@
from dataclasses import dataclass from __future__ import annotations
from datetime import datetime from dataclasses import asdict, dataclass
import dataclasses
from datetime import datetime, timedelta
import enum import enum
import json
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List from typing import Any, Dict, List
from sqlmodel import JSON, Column, Enum, SQLModel, Field, String, TypeDecorator from rec import routing
from sqlmodel import JSON, SQLModel, Field, String
@dataclass @dataclass
@ -13,6 +17,28 @@ class PriceHistoryItem:
price: float price: float
@dataclass(frozen=True)
class Route:
legs: list[RouteLegStep]
distance_meters: int
duration_s: int
@property
def duration(self) -> timedelta:
return timedelta(seconds=self.duration_s)
@dataclass(frozen=True)
class RouteLegStep:
distance_meters: int
duration_s: int
travel_mode: routing.TravelMode
@property
def duration(self) -> timedelta:
return timedelta(seconds=self.duration_s)
class ListingSite(enum.StrEnum): class ListingSite(enum.StrEnum):
RIGHTMOVE = "rightmove" RIGHTMOVE = "rightmove"
# ZOOPLA = "zoopla" # ZOOPLA = "zoopla"
@ -38,6 +64,69 @@ class Listing(SQLModel, table=False):
additional_info: Dict[str, Any] = Field( additional_info: Dict[str, Any] = Field(
default_factory=dict, sa_type=JSON, nullable=False default_factory=dict, sa_type=JSON, nullable=False
) )
routing_info_json: str = Field(
sa_type=String, nullable=True, default=None
) # Store as JSON string for simplicity
@property
def is_removed(self) -> bool:
return not self.additional_info["property"]["visible"]
@property
def routing_info(self) -> dict[DestinationMode, List[Route]]:
"""
Returns a list of DestinationMode objects from the routing_info_str.
"""
if not self.routing_info_json:
return {}
# TODO: move to a separate serializer class
json_data = json.loads(self.routing_info_json)
destimation_routes = {}
for destination_mode_str, routes_json in json_data.items():
destination_mode = DestinationMode(
destination_address=json.loads(destination_mode_str)[
"destination_address"
],
travel_mode=routing.TravelMode(
json.loads(destination_mode_str)["travel_mode"]
),
)
parsed_route = json.loads(routes_json[0])
routes = [
Route(
legs=[
RouteLegStep(
distance_meters=step["distance_meters"],
duration_s=step["duration_s"],
travel_mode=routing.TravelMode(step["travel_mode"]),
)
for step in parsed_route["legs"]
],
distance_meters=parsed_route["distance_meters"],
duration_s=int(parsed_route["duration_s"]),
)
]
destimation_routes[destination_mode] = routes
return destimation_routes
def serialize_routing_info(
self, routing_info: dict[DestinationMode, list[Route]]
) -> str:
"""
Serializes the routing_info to a JSON string.
"""
# TODO: move to a separate serializer class
# for destination_mode, routes in routing_info.items():
serialized = json.dumps(
{
json.dumps(dataclasses.asdict(destination_mode)): [
json.dumps(dataclasses.asdict(route)) for route in routes
]
for destination_mode, routes in routing_info.items()
}
)
return serialized
class FurnishType(enum.StrEnum): class FurnishType(enum.StrEnum):
@ -57,3 +146,39 @@ class BuyListing(Listing, table=True):
lease_left: int | None = Field( lease_left: int | None = Field(
default=None, nullable=True default=None, nullable=True
) # in years, e.g., 90, 80, etc. ) # in years, e.g., 90, 80, etc.
@dataclass(frozen=True)
class DestinationMode:
destination_address: str
travel_mode: routing.TravelMode
def __hash__(self) -> int:
return hash((self.destination_address, self.travel_mode))
# def to_dict(self) -> dict[str, str | routing.TravelMode]:
# return {
# "destination_address": self.destination_address,
# "travel_mode": self.travel_mode.value,
# }
# @classmethod
# def from_dict(cls, data: dict):
# return cls(
# destination_address=data["destination_address"],
# travel_mode=routing.TravelMode(data["travel_mode"]),
# )
# def __json__(self) -> dict[str, str | routing.TravelMode]:
# return {
# "destination_address": self.destination_address,
# "travel_mode": self.travel_mode.value,
# }
def __getstate__(self):
# This allows serializers to pick up a dict representation
return asdict(self)
def __iter__(self):
# Makes it behave like a dict when expected
return iter(asdict(self).items())