migrate routing command to use the models and store data there

This commit is contained in:
Viktor Barzin 2025-06-08 11:45:05 +00:00
parent 325823e631
commit 80c335ba04
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
6 changed files with 194 additions and 108 deletions

View file

@ -1,45 +1,67 @@
from data_access import Listing
from tqdm import tqdm
from models.listing import DestinationMode, Route, RouteLegStep
from repositories.listing_repository import ListingRepository
from tqdm.asyncio import tqdm
from rec import routing
from models import Listing
async def calculate_route(
listing_paths: list[str],
repository: ListingRepository,
destination_address: str,
travel_mode: routing.TravelMode,
limit: int | None = None,
) -> None:
listings = await repository.get_listings()
listings = Listing.get_all_listings(listing_paths)
if limit is not None:
listings = listings[:limit]
# reduce listings to everything within 7 miles
filtered_listings = []
for listing in listings:
print(f"Processing {listing.identifier}")
if listing.isRemoved:
print(f"Removed-Skip: Skipping {listing.identifier} " "is already removed.")
continue
sqm_ocr = await listing.sqm_ocr()
if sqm_ocr is None or sqm_ocr < 30 or sqm_ocr > 200:
print(
(
f"Floorplan-Skip: Skipping {listing.identifier} as "
f"sqm_ocr is {sqm_ocr}"
destimation_mode = DestinationMode(destination_address, travel_mode)
updated_listings = await tqdm.gather(
*[update_routing_info(listing, destimation_mode) for listing in listings],
total=len(listings),
desc="Updating routing info",
)
await repository.upsert_listings(
[listing for listing in updated_listings if listing is not None]
)
async def update_routing_info(
listing: Listing, destination_mode: DestinationMode
) -> Listing | None:
if listing.routing_info.get(destination_mode) is not None:
# already calculated, do not recompute to save API calls
return None
routes_data = routing.transit_route(
listing.latitude,
listing.longtitude,
destination_mode.destination_address,
destination_mode.travel_mode,
)
route_data = routes_data["routes"][0]
routes = []
for route_data in routes_data["routes"]:
duration_s = int(route_data["duration"].split("s")[0])
route = Route(
legs=[
RouteLegStep(
distance_meters=step_data["distanceMeters"],
duration_s=int(step_data["staticDuration"].split("s")[0]),
travel_mode=routing.TravelMode(step_data["travelMode"]),
)
)
continue
filtered_listings.append(listing)
print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}")
for listing in tqdm(filtered_listings):
listing.calculate_route(
destination_address,
travel_mode,
recalculate=False,
for step_data in route_data["legs"][0]["steps"]
],
distance_meters=route_data["distanceMeters"],
duration_s=duration_s,
)
traveltime = listing.travel_time(destination_address, travel_mode)[0]
duration_minutes = traveltime["duration"] / 60.0
tqdm.write(f"{listing.identifier} {duration_minutes}")
routes.append(route)
listing.routing_info_json = listing.serialize_routing_info(
{**listing.routing_info, **{destination_mode: routes}}
)
return listing
# async def geocode_address(

View file

@ -1,8 +1,8 @@
"""add more fields to tables
Revision ID: b78e1ed31eed
Revision ID: 72b7410ff3e6
Revises:
Create Date: 2025-06-06 19:50:09.773676
Create Date: 2025-06-08 10:56:22.448446
"""
@ -14,7 +14,7 @@ import sqlmodel
# revision identifiers, used by Alembic.
revision: str = "b78e1ed31eed"
revision: str = "72b7410ff3e6"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
@ -30,6 +30,9 @@ def upgrade() -> None:
sa.Column("number_of_bedrooms", sa.Integer(), nullable=False),
sa.Column("square_meters", sa.Float(), nullable=True),
sa.Column("agency", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column(
"council_tax_band", sqlmodel.sql.sqltypes.AutoString(), nullable=True
),
sa.Column("longtitude", sa.Float(), nullable=False),
sa.Column("latitude", sa.Float(), nullable=False),
sa.Column("price_history", sa.JSON(), nullable=False),
@ -38,11 +41,10 @@ def upgrade() -> None:
),
sa.Column("last_seen", sa.DateTime(), nullable=False),
sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column("floorplan_image_paths", sa.JSON(), nullable=False),
sa.Column("additional_info", sa.JSON(), nullable=False),
sa.Column("routing_info_json", sa.String(), nullable=True),
sa.Column("service_charge", sa.Float(), nullable=True),
sa.Column(
"council_tax_band", sqlmodel.sql.sqltypes.AutoString(), nullable=True
),
sa.Column("lease_left", sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint("id"),
)
@ -64,7 +66,9 @@ def upgrade() -> None:
),
sa.Column("last_seen", sa.DateTime(), nullable=False),
sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column("floorplan_image_paths", sa.JSON(), nullable=False),
sa.Column("additional_info", sa.JSON(), nullable=False),
sa.Column("routing_info_json", sa.String(), nullable=True),
sa.Column("available_from", sa.DateTime(), nullable=True),
sa.Column(
"furnish_type",

View file

@ -1,34 +0,0 @@
"""add more fields to tables
Revision ID: 8a7accc583c9
Revises: b2ffa638aafc
Create Date: 2025-06-07 13:38:08.805386
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '8a7accc583c9'
down_revision: Union[str, None] = 'b2ffa638aafc'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('buylisting', sa.Column('floorplan_image_paths', sa.JSON(), nullable=False))
op.add_column('rentlisting', sa.Column('floorplan_image_paths', sa.JSON(), nullable=False))
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('rentlisting', 'floorplan_image_paths')
op.drop_column('buylisting', 'floorplan_image_paths')
# ### end Alembic commands ###

View file

@ -1,32 +0,0 @@
"""add more fields to tables
Revision ID: b2ffa638aafc
Revises: b78e1ed31eed
Create Date: 2025-06-07 12:18:28.963851
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = 'b2ffa638aafc'
down_revision: Union[str, None] = 'b78e1ed31eed'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###

View file

@ -118,7 +118,6 @@ def listing_filter_options(func):
def cli(ctx, data_dir: str):
ctx.ensure_object(dict)
ctx.obj["data_dir"] = data_dir
pass
@cli.command()
@ -218,12 +217,14 @@ def routing(
"Please set it to your API key for the routing service."
)
repository = ListingRepository(engine=engine)
asyncio.run(
routing_module.calculate_route(
listing_paths,
repository,
destination_address,
# destination_address_coordinates,
TravelMode[travel_mode],
limit=limit,
)
)

View file

@ -1,9 +1,13 @@
from dataclasses import dataclass
from datetime import datetime
from __future__ import annotations
from dataclasses import asdict, dataclass
import dataclasses
from datetime import datetime, timedelta
import enum
import json
from pathlib import Path
from typing import Any, Dict, List
from sqlmodel import JSON, Column, Enum, SQLModel, Field, String, TypeDecorator
from rec import routing
from sqlmodel import JSON, SQLModel, Field, String
@dataclass
@ -13,6 +17,28 @@ class PriceHistoryItem:
price: float
@dataclass(frozen=True)
class Route:
legs: list[RouteLegStep]
distance_meters: int
duration_s: int
@property
def duration(self) -> timedelta:
return timedelta(seconds=self.duration_s)
@dataclass(frozen=True)
class RouteLegStep:
distance_meters: int
duration_s: int
travel_mode: routing.TravelMode
@property
def duration(self) -> timedelta:
return timedelta(seconds=self.duration_s)
class ListingSite(enum.StrEnum):
RIGHTMOVE = "rightmove"
# ZOOPLA = "zoopla"
@ -38,6 +64,69 @@ class Listing(SQLModel, table=False):
additional_info: Dict[str, Any] = Field(
default_factory=dict, sa_type=JSON, nullable=False
)
routing_info_json: str = Field(
sa_type=String, nullable=True, default=None
) # Store as JSON string for simplicity
@property
def is_removed(self) -> bool:
return not self.additional_info["property"]["visible"]
@property
def routing_info(self) -> dict[DestinationMode, List[Route]]:
"""
Returns a list of DestinationMode objects from the routing_info_str.
"""
if not self.routing_info_json:
return {}
# TODO: move to a separate serializer class
json_data = json.loads(self.routing_info_json)
destimation_routes = {}
for destination_mode_str, routes_json in json_data.items():
destination_mode = DestinationMode(
destination_address=json.loads(destination_mode_str)[
"destination_address"
],
travel_mode=routing.TravelMode(
json.loads(destination_mode_str)["travel_mode"]
),
)
parsed_route = json.loads(routes_json[0])
routes = [
Route(
legs=[
RouteLegStep(
distance_meters=step["distance_meters"],
duration_s=step["duration_s"],
travel_mode=routing.TravelMode(step["travel_mode"]),
)
for step in parsed_route["legs"]
],
distance_meters=parsed_route["distance_meters"],
duration_s=int(parsed_route["duration_s"]),
)
]
destimation_routes[destination_mode] = routes
return destimation_routes
def serialize_routing_info(
self, routing_info: dict[DestinationMode, list[Route]]
) -> str:
"""
Serializes the routing_info to a JSON string.
"""
# TODO: move to a separate serializer class
# for destination_mode, routes in routing_info.items():
serialized = json.dumps(
{
json.dumps(dataclasses.asdict(destination_mode)): [
json.dumps(dataclasses.asdict(route)) for route in routes
]
for destination_mode, routes in routing_info.items()
}
)
return serialized
class FurnishType(enum.StrEnum):
@ -57,3 +146,39 @@ class BuyListing(Listing, table=True):
lease_left: int | None = Field(
default=None, nullable=True
) # in years, e.g., 90, 80, etc.
@dataclass(frozen=True)
class DestinationMode:
destination_address: str
travel_mode: routing.TravelMode
def __hash__(self) -> int:
return hash((self.destination_address, self.travel_mode))
# def to_dict(self) -> dict[str, str | routing.TravelMode]:
# return {
# "destination_address": self.destination_address,
# "travel_mode": self.travel_mode.value,
# }
# @classmethod
# def from_dict(cls, data: dict):
# return cls(
# destination_address=data["destination_address"],
# travel_mode=routing.TravelMode(data["travel_mode"]),
# )
# def __json__(self) -> dict[str, str | routing.TravelMode]:
# return {
# "destination_address": self.destination_address,
# "travel_mode": self.travel_mode.value,
# }
def __getstate__(self):
# This allows serializers to pick up a dict representation
return asdict(self)
def __iter__(self):
# Makes it behave like a dict when expected
return iter(asdict(self).items())