migrate routing command to use the models and store data there
This commit is contained in:
parent
325823e631
commit
80c335ba04
6 changed files with 194 additions and 108 deletions
|
|
@ -1,45 +1,67 @@
|
||||||
from data_access import Listing
|
from models.listing import DestinationMode, Route, RouteLegStep
|
||||||
from tqdm import tqdm
|
from repositories.listing_repository import ListingRepository
|
||||||
|
from tqdm.asyncio import tqdm
|
||||||
from rec import routing
|
from rec import routing
|
||||||
|
from models import Listing
|
||||||
|
|
||||||
|
|
||||||
async def calculate_route(
|
async def calculate_route(
|
||||||
listing_paths: list[str],
|
repository: ListingRepository,
|
||||||
destination_address: str,
|
destination_address: str,
|
||||||
travel_mode: routing.TravelMode,
|
travel_mode: routing.TravelMode,
|
||||||
|
limit: int | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
listings = await repository.get_listings()
|
||||||
|
|
||||||
listings = Listing.get_all_listings(listing_paths)
|
if limit is not None:
|
||||||
|
listings = listings[:limit]
|
||||||
|
|
||||||
# reduce listings to everything within 7 miles
|
destimation_mode = DestinationMode(destination_address, travel_mode)
|
||||||
filtered_listings = []
|
updated_listings = await tqdm.gather(
|
||||||
for listing in listings:
|
*[update_routing_info(listing, destimation_mode) for listing in listings],
|
||||||
print(f"Processing {listing.identifier}")
|
total=len(listings),
|
||||||
if listing.isRemoved:
|
desc="Updating routing info",
|
||||||
print(f"Removed-Skip: Skipping {listing.identifier} " "is already removed.")
|
)
|
||||||
continue
|
await repository.upsert_listings(
|
||||||
sqm_ocr = await listing.sqm_ocr()
|
[listing for listing in updated_listings if listing is not None]
|
||||||
if sqm_ocr is None or sqm_ocr < 30 or sqm_ocr > 200:
|
)
|
||||||
print(
|
|
||||||
(
|
|
||||||
f"Floorplan-Skip: Skipping {listing.identifier} as "
|
async def update_routing_info(
|
||||||
f"sqm_ocr is {sqm_ocr}"
|
listing: Listing, destination_mode: DestinationMode
|
||||||
|
) -> Listing | None:
|
||||||
|
if listing.routing_info.get(destination_mode) is not None:
|
||||||
|
# already calculated, do not recompute to save API calls
|
||||||
|
return None
|
||||||
|
|
||||||
|
routes_data = routing.transit_route(
|
||||||
|
listing.latitude,
|
||||||
|
listing.longtitude,
|
||||||
|
destination_mode.destination_address,
|
||||||
|
destination_mode.travel_mode,
|
||||||
|
)
|
||||||
|
|
||||||
|
route_data = routes_data["routes"][0]
|
||||||
|
routes = []
|
||||||
|
for route_data in routes_data["routes"]:
|
||||||
|
duration_s = int(route_data["duration"].split("s")[0])
|
||||||
|
route = Route(
|
||||||
|
legs=[
|
||||||
|
RouteLegStep(
|
||||||
|
distance_meters=step_data["distanceMeters"],
|
||||||
|
duration_s=int(step_data["staticDuration"].split("s")[0]),
|
||||||
|
travel_mode=routing.TravelMode(step_data["travelMode"]),
|
||||||
)
|
)
|
||||||
)
|
for step_data in route_data["legs"][0]["steps"]
|
||||||
continue
|
],
|
||||||
filtered_listings.append(listing)
|
distance_meters=route_data["distanceMeters"],
|
||||||
|
duration_s=duration_s,
|
||||||
print(f"Filtered listings from {len(listings)} to {len(filtered_listings)}")
|
|
||||||
|
|
||||||
for listing in tqdm(filtered_listings):
|
|
||||||
listing.calculate_route(
|
|
||||||
destination_address,
|
|
||||||
travel_mode,
|
|
||||||
recalculate=False,
|
|
||||||
)
|
)
|
||||||
traveltime = listing.travel_time(destination_address, travel_mode)[0]
|
routes.append(route)
|
||||||
duration_minutes = traveltime["duration"] / 60.0
|
listing.routing_info_json = listing.serialize_routing_info(
|
||||||
tqdm.write(f"{listing.identifier} {duration_minutes}")
|
{**listing.routing_info, **{destination_mode: routes}}
|
||||||
|
)
|
||||||
|
return listing
|
||||||
|
|
||||||
|
|
||||||
# async def geocode_address(
|
# async def geocode_address(
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
"""add more fields to tables
|
"""add more fields to tables
|
||||||
|
|
||||||
Revision ID: b78e1ed31eed
|
Revision ID: 72b7410ff3e6
|
||||||
Revises:
|
Revises:
|
||||||
Create Date: 2025-06-06 19:50:09.773676
|
Create Date: 2025-06-08 10:56:22.448446
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
@ -14,7 +14,7 @@ import sqlmodel
|
||||||
|
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision: str = "b78e1ed31eed"
|
revision: str = "72b7410ff3e6"
|
||||||
down_revision: Union[str, None] = None
|
down_revision: Union[str, None] = None
|
||||||
branch_labels: Union[str, Sequence[str], None] = None
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
depends_on: Union[str, Sequence[str], None] = None
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
@ -30,6 +30,9 @@ def upgrade() -> None:
|
||||||
sa.Column("number_of_bedrooms", sa.Integer(), nullable=False),
|
sa.Column("number_of_bedrooms", sa.Integer(), nullable=False),
|
||||||
sa.Column("square_meters", sa.Float(), nullable=True),
|
sa.Column("square_meters", sa.Float(), nullable=True),
|
||||||
sa.Column("agency", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
|
sa.Column("agency", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
|
||||||
|
sa.Column(
|
||||||
|
"council_tax_band", sqlmodel.sql.sqltypes.AutoString(), nullable=True
|
||||||
|
),
|
||||||
sa.Column("longtitude", sa.Float(), nullable=False),
|
sa.Column("longtitude", sa.Float(), nullable=False),
|
||||||
sa.Column("latitude", sa.Float(), nullable=False),
|
sa.Column("latitude", sa.Float(), nullable=False),
|
||||||
sa.Column("price_history", sa.JSON(), nullable=False),
|
sa.Column("price_history", sa.JSON(), nullable=False),
|
||||||
|
|
@ -38,11 +41,10 @@ def upgrade() -> None:
|
||||||
),
|
),
|
||||||
sa.Column("last_seen", sa.DateTime(), nullable=False),
|
sa.Column("last_seen", sa.DateTime(), nullable=False),
|
||||||
sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
|
sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
|
||||||
|
sa.Column("floorplan_image_paths", sa.JSON(), nullable=False),
|
||||||
sa.Column("additional_info", sa.JSON(), nullable=False),
|
sa.Column("additional_info", sa.JSON(), nullable=False),
|
||||||
|
sa.Column("routing_info_json", sa.String(), nullable=True),
|
||||||
sa.Column("service_charge", sa.Float(), nullable=True),
|
sa.Column("service_charge", sa.Float(), nullable=True),
|
||||||
sa.Column(
|
|
||||||
"council_tax_band", sqlmodel.sql.sqltypes.AutoString(), nullable=True
|
|
||||||
),
|
|
||||||
sa.Column("lease_left", sa.Integer(), nullable=True),
|
sa.Column("lease_left", sa.Integer(), nullable=True),
|
||||||
sa.PrimaryKeyConstraint("id"),
|
sa.PrimaryKeyConstraint("id"),
|
||||||
)
|
)
|
||||||
|
|
@ -64,7 +66,9 @@ def upgrade() -> None:
|
||||||
),
|
),
|
||||||
sa.Column("last_seen", sa.DateTime(), nullable=False),
|
sa.Column("last_seen", sa.DateTime(), nullable=False),
|
||||||
sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
|
sa.Column("photo_thumbnail", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
|
||||||
|
sa.Column("floorplan_image_paths", sa.JSON(), nullable=False),
|
||||||
sa.Column("additional_info", sa.JSON(), nullable=False),
|
sa.Column("additional_info", sa.JSON(), nullable=False),
|
||||||
|
sa.Column("routing_info_json", sa.String(), nullable=True),
|
||||||
sa.Column("available_from", sa.DateTime(), nullable=True),
|
sa.Column("available_from", sa.DateTime(), nullable=True),
|
||||||
sa.Column(
|
sa.Column(
|
||||||
"furnish_type",
|
"furnish_type",
|
||||||
|
|
@ -1,34 +0,0 @@
|
||||||
"""add more fields to tables
|
|
||||||
|
|
||||||
Revision ID: 8a7accc583c9
|
|
||||||
Revises: b2ffa638aafc
|
|
||||||
Create Date: 2025-06-07 13:38:08.805386
|
|
||||||
|
|
||||||
"""
|
|
||||||
from typing import Sequence, Union
|
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
|
||||||
|
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
|
||||||
revision: str = '8a7accc583c9'
|
|
||||||
down_revision: Union[str, None] = 'b2ffa638aafc'
|
|
||||||
branch_labels: Union[str, Sequence[str], None] = None
|
|
||||||
depends_on: Union[str, Sequence[str], None] = None
|
|
||||||
|
|
||||||
|
|
||||||
def upgrade() -> None:
|
|
||||||
"""Upgrade schema."""
|
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
|
||||||
op.add_column('buylisting', sa.Column('floorplan_image_paths', sa.JSON(), nullable=False))
|
|
||||||
op.add_column('rentlisting', sa.Column('floorplan_image_paths', sa.JSON(), nullable=False))
|
|
||||||
# ### end Alembic commands ###
|
|
||||||
|
|
||||||
|
|
||||||
def downgrade() -> None:
|
|
||||||
"""Downgrade schema."""
|
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
|
||||||
op.drop_column('rentlisting', 'floorplan_image_paths')
|
|
||||||
op.drop_column('buylisting', 'floorplan_image_paths')
|
|
||||||
# ### end Alembic commands ###
|
|
||||||
|
|
@ -1,32 +0,0 @@
|
||||||
"""add more fields to tables
|
|
||||||
|
|
||||||
Revision ID: b2ffa638aafc
|
|
||||||
Revises: b78e1ed31eed
|
|
||||||
Create Date: 2025-06-07 12:18:28.963851
|
|
||||||
|
|
||||||
"""
|
|
||||||
from typing import Sequence, Union
|
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
|
||||||
|
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
|
||||||
revision: str = 'b2ffa638aafc'
|
|
||||||
down_revision: Union[str, None] = 'b78e1ed31eed'
|
|
||||||
branch_labels: Union[str, Sequence[str], None] = None
|
|
||||||
depends_on: Union[str, Sequence[str], None] = None
|
|
||||||
|
|
||||||
|
|
||||||
def upgrade() -> None:
|
|
||||||
"""Upgrade schema."""
|
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
|
||||||
pass
|
|
||||||
# ### end Alembic commands ###
|
|
||||||
|
|
||||||
|
|
||||||
def downgrade() -> None:
|
|
||||||
"""Downgrade schema."""
|
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
|
||||||
pass
|
|
||||||
# ### end Alembic commands ###
|
|
||||||
|
|
@ -118,7 +118,6 @@ def listing_filter_options(func):
|
||||||
def cli(ctx, data_dir: str):
|
def cli(ctx, data_dir: str):
|
||||||
ctx.ensure_object(dict)
|
ctx.ensure_object(dict)
|
||||||
ctx.obj["data_dir"] = data_dir
|
ctx.obj["data_dir"] = data_dir
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
|
|
@ -218,12 +217,14 @@ def routing(
|
||||||
"Please set it to your API key for the routing service."
|
"Please set it to your API key for the routing service."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
repository = ListingRepository(engine=engine)
|
||||||
asyncio.run(
|
asyncio.run(
|
||||||
routing_module.calculate_route(
|
routing_module.calculate_route(
|
||||||
listing_paths,
|
repository,
|
||||||
destination_address,
|
destination_address,
|
||||||
# destination_address_coordinates,
|
# destination_address_coordinates,
|
||||||
TravelMode[travel_mode],
|
TravelMode[travel_mode],
|
||||||
|
limit=limit,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,13 @@
|
||||||
from dataclasses import dataclass
|
from __future__ import annotations
|
||||||
from datetime import datetime
|
from dataclasses import asdict, dataclass
|
||||||
|
import dataclasses
|
||||||
|
from datetime import datetime, timedelta
|
||||||
import enum
|
import enum
|
||||||
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
from sqlmodel import JSON, Column, Enum, SQLModel, Field, String, TypeDecorator
|
from rec import routing
|
||||||
|
from sqlmodel import JSON, SQLModel, Field, String
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -13,6 +17,28 @@ class PriceHistoryItem:
|
||||||
price: float
|
price: float
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Route:
|
||||||
|
legs: list[RouteLegStep]
|
||||||
|
distance_meters: int
|
||||||
|
duration_s: int
|
||||||
|
|
||||||
|
@property
|
||||||
|
def duration(self) -> timedelta:
|
||||||
|
return timedelta(seconds=self.duration_s)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class RouteLegStep:
|
||||||
|
distance_meters: int
|
||||||
|
duration_s: int
|
||||||
|
travel_mode: routing.TravelMode
|
||||||
|
|
||||||
|
@property
|
||||||
|
def duration(self) -> timedelta:
|
||||||
|
return timedelta(seconds=self.duration_s)
|
||||||
|
|
||||||
|
|
||||||
class ListingSite(enum.StrEnum):
|
class ListingSite(enum.StrEnum):
|
||||||
RIGHTMOVE = "rightmove"
|
RIGHTMOVE = "rightmove"
|
||||||
# ZOOPLA = "zoopla"
|
# ZOOPLA = "zoopla"
|
||||||
|
|
@ -38,6 +64,69 @@ class Listing(SQLModel, table=False):
|
||||||
additional_info: Dict[str, Any] = Field(
|
additional_info: Dict[str, Any] = Field(
|
||||||
default_factory=dict, sa_type=JSON, nullable=False
|
default_factory=dict, sa_type=JSON, nullable=False
|
||||||
)
|
)
|
||||||
|
routing_info_json: str = Field(
|
||||||
|
sa_type=String, nullable=True, default=None
|
||||||
|
) # Store as JSON string for simplicity
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_removed(self) -> bool:
|
||||||
|
return not self.additional_info["property"]["visible"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def routing_info(self) -> dict[DestinationMode, List[Route]]:
|
||||||
|
"""
|
||||||
|
Returns a list of DestinationMode objects from the routing_info_str.
|
||||||
|
"""
|
||||||
|
if not self.routing_info_json:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# TODO: move to a separate serializer class
|
||||||
|
json_data = json.loads(self.routing_info_json)
|
||||||
|
destimation_routes = {}
|
||||||
|
for destination_mode_str, routes_json in json_data.items():
|
||||||
|
destination_mode = DestinationMode(
|
||||||
|
destination_address=json.loads(destination_mode_str)[
|
||||||
|
"destination_address"
|
||||||
|
],
|
||||||
|
travel_mode=routing.TravelMode(
|
||||||
|
json.loads(destination_mode_str)["travel_mode"]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
parsed_route = json.loads(routes_json[0])
|
||||||
|
routes = [
|
||||||
|
Route(
|
||||||
|
legs=[
|
||||||
|
RouteLegStep(
|
||||||
|
distance_meters=step["distance_meters"],
|
||||||
|
duration_s=step["duration_s"],
|
||||||
|
travel_mode=routing.TravelMode(step["travel_mode"]),
|
||||||
|
)
|
||||||
|
for step in parsed_route["legs"]
|
||||||
|
],
|
||||||
|
distance_meters=parsed_route["distance_meters"],
|
||||||
|
duration_s=int(parsed_route["duration_s"]),
|
||||||
|
)
|
||||||
|
]
|
||||||
|
destimation_routes[destination_mode] = routes
|
||||||
|
return destimation_routes
|
||||||
|
|
||||||
|
def serialize_routing_info(
|
||||||
|
self, routing_info: dict[DestinationMode, list[Route]]
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Serializes the routing_info to a JSON string.
|
||||||
|
"""
|
||||||
|
# TODO: move to a separate serializer class
|
||||||
|
# for destination_mode, routes in routing_info.items():
|
||||||
|
serialized = json.dumps(
|
||||||
|
{
|
||||||
|
json.dumps(dataclasses.asdict(destination_mode)): [
|
||||||
|
json.dumps(dataclasses.asdict(route)) for route in routes
|
||||||
|
]
|
||||||
|
for destination_mode, routes in routing_info.items()
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return serialized
|
||||||
|
|
||||||
|
|
||||||
class FurnishType(enum.StrEnum):
|
class FurnishType(enum.StrEnum):
|
||||||
|
|
@ -57,3 +146,39 @@ class BuyListing(Listing, table=True):
|
||||||
lease_left: int | None = Field(
|
lease_left: int | None = Field(
|
||||||
default=None, nullable=True
|
default=None, nullable=True
|
||||||
) # in years, e.g., 90, 80, etc.
|
) # in years, e.g., 90, 80, etc.
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class DestinationMode:
|
||||||
|
destination_address: str
|
||||||
|
travel_mode: routing.TravelMode
|
||||||
|
|
||||||
|
def __hash__(self) -> int:
|
||||||
|
return hash((self.destination_address, self.travel_mode))
|
||||||
|
|
||||||
|
# def to_dict(self) -> dict[str, str | routing.TravelMode]:
|
||||||
|
# return {
|
||||||
|
# "destination_address": self.destination_address,
|
||||||
|
# "travel_mode": self.travel_mode.value,
|
||||||
|
# }
|
||||||
|
|
||||||
|
# @classmethod
|
||||||
|
# def from_dict(cls, data: dict):
|
||||||
|
# return cls(
|
||||||
|
# destination_address=data["destination_address"],
|
||||||
|
# travel_mode=routing.TravelMode(data["travel_mode"]),
|
||||||
|
# )
|
||||||
|
|
||||||
|
# def __json__(self) -> dict[str, str | routing.TravelMode]:
|
||||||
|
# return {
|
||||||
|
# "destination_address": self.destination_address,
|
||||||
|
# "travel_mode": self.travel_mode.value,
|
||||||
|
# }
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
# This allows serializers to pick up a dict representation
|
||||||
|
return asdict(self)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
# Makes it behave like a dict when expected
|
||||||
|
return iter(asdict(self).items())
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue