migrate routing command to use the models and store data there
This commit is contained in:
parent
325823e631
commit
80c335ba04
6 changed files with 194 additions and 108 deletions
|
|
@ -1,45 +1,67 @@
|
|||
from data_access import Listing
|
||||
from tqdm import tqdm
|
||||
from models.listing import DestinationMode, Route, RouteLegStep
|
||||
from repositories.listing_repository import ListingRepository
|
||||
from tqdm.asyncio import tqdm
|
||||
from rec import routing
|
||||
from models import Listing
|
||||
|
||||
|
||||
async def calculate_route(
|
||||
listing_paths: list[str],
|
||||
repository: ListingRepository,
|
||||
destination_address: str,
|
||||
travel_mode: routing.TravelMode,
|
||||
limit: int | None = None,
|
||||
) -> None:
|
||||
listings = await repository.get_listings()
|
||||
|
||||
listings = Listing.get_all_listings(listing_paths)
|
||||
if limit is not None:
|
||||
listings = listings[:limit]
|
||||
|
||||
# reduce listings to everything within 7 miles
|
||||
filtered_listings = []
|
||||
for listing in listings:
|
||||
print(f"Processing {listing.identifier}")
|
||||
if listing.isRemoved:
|
||||
print(f"Removed-Skip: Skipping {listing.identifier} " "is already removed.")
|
||||
continue
|
||||
sqm_ocr = await listing.sqm_ocr()
|
||||
if sqm_ocr is None or sqm_ocr < 30 or sqm_ocr > 200:
|
||||
print(
|
||||
(
|
||||
f"Floorplan-Skip: Skipping {listing.identifier} as "
|
||||
f"sqm_ocr is {sqm_ocr}"
|
||||
destimation_mode = DestinationMode(destination_address, travel_mode)
|
||||
updated_listings = await tqdm.gather(
|
||||
*[update_routing_info(listing, destimation_mode) for listing in listings],
|
||||
total=len(listings),
|
||||
desc="Updating routing info",
|
||||
)
|
||||
await repository.upsert_listings(
|
||||
[listing for listing in updated_listings if listing is not None]
|
||||
)
|
||||
|
||||
|
||||
async def update_routing_info(
|
||||
listing: Listing, destination_mode: DestinationMode
|
||||
) -> Listing | None:
|
||||
if listing.routing_info.get(destination_mode) is not None:
|
||||
# already calculated, do not recompute to save API calls
|
||||
return None
|
||||
|
||||
routes_data = routing.transit_route(
|
||||
listing.latitude,
|
||||
listing.longtitude,
|
||||
destination_mode.destination_address,
|
||||
destination_mode.travel_mode,
|
||||
)
|
||||
|
||||
route_data = routes_data["routes"][0]
|
||||
routes = []
|
||||
for route_data in routes_data["routes"]:
|
||||
duration_s = int(route_data["duration"].split("s")[0])
|
||||
route = Route(
|
||||
legs=[
|
||||
RouteLegStep(
|
||||
distance_meters=step_data["distanceMeters"],
|
||||
duration_s=int(step_data["staticDuration"].split("s")[0]),
|
||||
travel_mode=routing.TravelMode(step_data["travelMode"]),
|
||||
)
|
||||
)
|
||||
continue
|
||||
filtered_listings.append(listing)
|
||||
|
||||
print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}")
|
||||
|
||||
for listing in tqdm(filtered_listings):
|
||||
listing.calculate_route(
|
||||
destination_address,
|
||||
travel_mode,
|
||||
recalculate=False,
|
||||
for step_data in route_data["legs"][0]["steps"]
|
||||
],
|
||||
distance_meters=route_data["distanceMeters"],
|
||||
duration_s=duration_s,
|
||||
)
|
||||
traveltime = listing.travel_time(destination_address, travel_mode)[0]
|
||||
duration_minutes = traveltime["duration"] / 60.0
|
||||
tqdm.write(f"{listing.identifier} {duration_minutes}")
|
||||
routes.append(route)
|
||||
listing.routing_info_json = listing.serialize_routing_info(
|
||||
{**listing.routing_info, **{destination_mode: routes}}
|
||||
)
|
||||
return listing
|
||||
|
||||
|
||||
# async def geocode_address(
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
"""add more fields to tables
|
||||
|
||||
Revision ID: b78e1ed31eed
|
||||
Revision ID: 72b7410ff3e6
|
||||
Revises:
|
||||
Create Date: 2025-06-06 19:50:09.773676
|
||||
Create Date: 2025-06-08 10:56:22.448446
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -14,7 +14,7 @@ import sqlmodel
|
|||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "b78e1ed31eed"
|
||||
revision: str = "72b7410ff3e6"
|
||||
down_revision: Union[str, None] = None
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
|
@ -30,6 +30,9 @@ def upgrade() -> None:
|
|||
sa.Column("number_of_bedrooms", sa.Integer(), nullable=False),
|
||||
sa.Column("square_meters", sa.Float(), nullable=True),
|
||||
sa.Column("agency", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
|
||||
sa.Column(
|
||||
"council_tax_band", sqlmodel.sql.sqltypes.AutoString(), nullable=True
|
||||
),
|
||||
sa.Column("longtitude", sa.Float(), nullable=False),
|
||||
sa.Column("latitude", sa.Float(), nullable=False),
|
||||
sa.Column("price_history", sa.JSON(), nullable=False),
|
||||
|
|
@ -38,11 +41,10 @@ def upgrade() -> None:
|
|||
),
|
||||
sa.Column("last_seen", sa.DateTime(), nullable=False),
|
||||
sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
|
||||
sa.Column("floorplan_image_paths", sa.JSON(), nullable=False),
|
||||
sa.Column("additional_info", sa.JSON(), nullable=False),
|
||||
sa.Column("routing_info_json", sa.String(), nullable=True),
|
||||
sa.Column("service_charge", sa.Float(), nullable=True),
|
||||
sa.Column(
|
||||
"council_tax_band", sqlmodel.sql.sqltypes.AutoString(), nullable=True
|
||||
),
|
||||
sa.Column("lease_left", sa.Integer(), nullable=True),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
|
|
@ -64,7 +66,9 @@ def upgrade() -> None:
|
|||
),
|
||||
sa.Column("last_seen", sa.DateTime(), nullable=False),
|
||||
sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
|
||||
sa.Column("floorplan_image_paths", sa.JSON(), nullable=False),
|
||||
sa.Column("additional_info", sa.JSON(), nullable=False),
|
||||
sa.Column("routing_info_json", sa.String(), nullable=True),
|
||||
sa.Column("available_from", sa.DateTime(), nullable=True),
|
||||
sa.Column(
|
||||
"furnish_type",
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
"""add more fields to tables
|
||||
|
||||
Revision ID: 8a7accc583c9
|
||||
Revises: b2ffa638aafc
|
||||
Create Date: 2025-06-07 13:38:08.805386
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '8a7accc583c9'
|
||||
down_revision: Union[str, None] = 'b2ffa638aafc'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('buylisting', sa.Column('floorplan_image_paths', sa.JSON(), nullable=False))
|
||||
op.add_column('rentlisting', sa.Column('floorplan_image_paths', sa.JSON(), nullable=False))
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_column('rentlisting', 'floorplan_image_paths')
|
||||
op.drop_column('buylisting', 'floorplan_image_paths')
|
||||
# ### end Alembic commands ###
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
"""add more fields to tables
|
||||
|
||||
Revision ID: b2ffa638aafc
|
||||
Revises: b78e1ed31eed
|
||||
Create Date: 2025-06-07 12:18:28.963851
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = 'b2ffa638aafc'
|
||||
down_revision: Union[str, None] = 'b78e1ed31eed'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
pass
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
pass
|
||||
# ### end Alembic commands ###
|
||||
|
|
@ -118,7 +118,6 @@ def listing_filter_options(func):
|
|||
def cli(ctx, data_dir: str):
|
||||
ctx.ensure_object(dict)
|
||||
ctx.obj["data_dir"] = data_dir
|
||||
pass
|
||||
|
||||
|
||||
@cli.command()
|
||||
|
|
@ -218,12 +217,14 @@ def routing(
|
|||
"Please set it to your API key for the routing service."
|
||||
)
|
||||
|
||||
repository = ListingRepository(engine=engine)
|
||||
asyncio.run(
|
||||
routing_module.calculate_route(
|
||||
listing_paths,
|
||||
repository,
|
||||
destination_address,
|
||||
# destination_address_coordinates,
|
||||
TravelMode[travel_mode],
|
||||
limit=limit,
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,13 @@
|
|||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from __future__ import annotations
|
||||
from dataclasses import asdict, dataclass
|
||||
import dataclasses
|
||||
from datetime import datetime, timedelta
|
||||
import enum
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
from sqlmodel import JSON, Column, Enum, SQLModel, Field, String, TypeDecorator
|
||||
from rec import routing
|
||||
from sqlmodel import JSON, SQLModel, Field, String
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -13,6 +17,28 @@ class PriceHistoryItem:
|
|||
price: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Route:
|
||||
legs: list[RouteLegStep]
|
||||
distance_meters: int
|
||||
duration_s: int
|
||||
|
||||
@property
|
||||
def duration(self) -> timedelta:
|
||||
return timedelta(seconds=self.duration_s)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RouteLegStep:
|
||||
distance_meters: int
|
||||
duration_s: int
|
||||
travel_mode: routing.TravelMode
|
||||
|
||||
@property
|
||||
def duration(self) -> timedelta:
|
||||
return timedelta(seconds=self.duration_s)
|
||||
|
||||
|
||||
class ListingSite(enum.StrEnum):
|
||||
RIGHTMOVE = "rightmove"
|
||||
# ZOOPLA = "zoopla"
|
||||
|
|
@ -38,6 +64,69 @@ class Listing(SQLModel, table=False):
|
|||
additional_info: Dict[str, Any] = Field(
|
||||
default_factory=dict, sa_type=JSON, nullable=False
|
||||
)
|
||||
routing_info_json: str = Field(
|
||||
sa_type=String, nullable=True, default=None
|
||||
) # Store as JSON string for simplicity
|
||||
|
||||
@property
|
||||
def is_removed(self) -> bool:
|
||||
return not self.additional_info["property"]["visible"]
|
||||
|
||||
@property
|
||||
def routing_info(self) -> dict[DestinationMode, List[Route]]:
|
||||
"""
|
||||
Returns a list of DestinationMode objects from the routing_info_str.
|
||||
"""
|
||||
if not self.routing_info_json:
|
||||
return {}
|
||||
|
||||
# TODO: move to a separate serializer class
|
||||
json_data = json.loads(self.routing_info_json)
|
||||
destimation_routes = {}
|
||||
for destination_mode_str, routes_json in json_data.items():
|
||||
destination_mode = DestinationMode(
|
||||
destination_address=json.loads(destination_mode_str)[
|
||||
"destination_address"
|
||||
],
|
||||
travel_mode=routing.TravelMode(
|
||||
json.loads(destination_mode_str)["travel_mode"]
|
||||
),
|
||||
)
|
||||
parsed_route = json.loads(routes_json[0])
|
||||
routes = [
|
||||
Route(
|
||||
legs=[
|
||||
RouteLegStep(
|
||||
distance_meters=step["distance_meters"],
|
||||
duration_s=step["duration_s"],
|
||||
travel_mode=routing.TravelMode(step["travel_mode"]),
|
||||
)
|
||||
for step in parsed_route["legs"]
|
||||
],
|
||||
distance_meters=parsed_route["distance_meters"],
|
||||
duration_s=int(parsed_route["duration_s"]),
|
||||
)
|
||||
]
|
||||
destimation_routes[destination_mode] = routes
|
||||
return destimation_routes
|
||||
|
||||
def serialize_routing_info(
|
||||
self, routing_info: dict[DestinationMode, list[Route]]
|
||||
) -> str:
|
||||
"""
|
||||
Serializes the routing_info to a JSON string.
|
||||
"""
|
||||
# TODO: move to a separate serializer class
|
||||
# for destination_mode, routes in routing_info.items():
|
||||
serialized = json.dumps(
|
||||
{
|
||||
json.dumps(dataclasses.asdict(destination_mode)): [
|
||||
json.dumps(dataclasses.asdict(route)) for route in routes
|
||||
]
|
||||
for destination_mode, routes in routing_info.items()
|
||||
}
|
||||
)
|
||||
return serialized
|
||||
|
||||
|
||||
class FurnishType(enum.StrEnum):
|
||||
|
|
@ -57,3 +146,39 @@ class BuyListing(Listing, table=True):
|
|||
lease_left: int | None = Field(
|
||||
default=None, nullable=True
|
||||
) # in years, e.g., 90, 80, etc.
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DestinationMode:
|
||||
destination_address: str
|
||||
travel_mode: routing.TravelMode
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash((self.destination_address, self.travel_mode))
|
||||
|
||||
# def to_dict(self) -> dict[str, str | routing.TravelMode]:
|
||||
# return {
|
||||
# "destination_address": self.destination_address,
|
||||
# "travel_mode": self.travel_mode.value,
|
||||
# }
|
||||
|
||||
# @classmethod
|
||||
# def from_dict(cls, data: dict):
|
||||
# return cls(
|
||||
# destination_address=data["destination_address"],
|
||||
# travel_mode=routing.TravelMode(data["travel_mode"]),
|
||||
# )
|
||||
|
||||
# def __json__(self) -> dict[str, str | routing.TravelMode]:
|
||||
# return {
|
||||
# "destination_address": self.destination_address,
|
||||
# "travel_mode": self.travel_mode.value,
|
||||
# }
|
||||
|
||||
def __getstate__(self):
|
||||
# This allows serializers to pick up a dict representation
|
||||
return asdict(self)
|
||||
|
||||
def __iter__(self):
|
||||
# Makes it behave like a dict when expected
|
||||
return iter(asdict(self).items())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue