wrongmove/crawler/models/listing.py

257 lines
8.1 KiB
Python
Raw Permalink Normal View History

from __future__ import annotations
from dataclasses import asdict, dataclass
import dataclasses
from datetime import datetime, timedelta
2025-06-04 21:09:29 +00:00
import enum
import json
2025-06-04 21:09:29 +00:00
from typing import Any, Dict, List
from pydantic import BaseModel, Field as PydanticField
from rec import routing
from sqlmodel import JSON, TEXT, SQLModel, Field
2025-06-08 18:18:38 +00:00
@dataclass(frozen=True)
2025-06-04 21:09:29 +00:00
class PriceHistoryItem:
first_seen: datetime
last_seen: datetime
price: float
2025-06-08 18:18:38 +00:00
def to_dict(self) -> Dict[str, float | str]:
return {
"first_seen": self.first_seen.isoformat(),
"last_seen": self.last_seen.isoformat(),
"price": self.price,
}
2025-06-04 21:09:29 +00:00
@dataclass(frozen=True)
class Route:
legs: list[RouteLegStep]
distance_meters: int
duration_s: int
@property
def duration(self) -> timedelta:
return timedelta(seconds=self.duration_s)
@dataclass(frozen=True)
class RouteLegStep:
distance_meters: int
duration_s: int
travel_mode: routing.TravelMode
@property
def duration(self) -> timedelta:
return timedelta(seconds=self.duration_s)
2025-06-04 21:09:29 +00:00
class ListingSite(enum.StrEnum):
RIGHTMOVE = "rightmove"
# ZOOPLA = "zoopla"
# ... add more
class Listing(SQLModel, table=False):
id: int = Field(primary_key=True)
price: float = Field(nullable=False, index=True)
number_of_bedrooms: int = Field(nullable=False, index=True)
square_meters: float | None = Field(default=None, nullable=True, index=True)
2025-06-04 21:09:29 +00:00
agency: str | None = Field(default=None, nullable=True)
council_tax_band: str | None = Field(default=None, nullable=True)
longitude: float = Field(nullable=False)
2025-06-04 21:09:29 +00:00
latitude: float = Field(nullable=False)
2025-06-08 18:18:38 +00:00
# price_history: List[Dict[str, Any]] = Field(default_factory=list, sa_type=JSON)
2025-06-22 21:18:52 +00:00
price_history_json: str = Field(sa_type=TEXT)
2025-06-04 21:09:29 +00:00
listing_site: ListingSite = Field(nullable=False)
last_seen: datetime = Field(
default_factory=datetime.now, nullable=False, index=True
)
2025-06-04 21:09:29 +00:00
photo_thumbnail: str | None = Field(default=None, nullable=True)
floorplan_image_paths: List[str] = Field(
default_factory=list, sa_type=JSON, nullable=False
)
additional_info: Dict[str, Any] = Field(
default_factory=dict, sa_type=JSON, nullable=False
)
routing_info_json: str = Field(
2025-06-22 21:18:52 +00:00
sa_type=TEXT, nullable=True, default=None
) # Store as JSON string for simplicity
@property
def is_removed(self) -> bool:
if not self.additional_info:
return False
property_info = self.additional_info.get("property", {})
return not property_info.get("visible", True)
2025-06-08 18:18:38 +00:00
@property
def price_per_square_meter(self) -> float | None:
"""
Returns the price per square meter.
"""
if self.square_meters is None or self.square_meters == 0:
return None
return round(self.price / self.square_meters, 2)
@property
def url(self):
return f"https://www.rightmove.co.uk/properties/{self.id}"
@property
def price_history(self) -> List[PriceHistoryItem]:
"""
Returns a list of PriceHistoryItem objects from the price_history_json.
"""
if not self.price_history_json:
return []
parsed: list = json.loads(str(self.price_history_json))
for item in parsed:
item["first_seen"] = datetime.fromisoformat(item["first_seen"])
item["last_seen"] = datetime.fromisoformat(item["last_seen"])
return [
PriceHistoryItem(
first_seen=item["first_seen"],
last_seen=item["last_seen"],
price=item["price"],
)
for item in parsed
]
@staticmethod
def serialize_price_history(price_history: List[PriceHistoryItem]) -> str:
"""
Serializes the price history to a JSON string.
"""
serialized = json.dumps(
[
{
"first_seen": item.first_seen.isoformat(),
"last_seen": item.last_seen.isoformat(),
"price": item.price,
}
for item in price_history
]
)
return serialized
@property
def routing_info(self) -> dict[DestinationMode, List[Route]]:
"""
Returns a list of DestinationMode objects from the routing_info_str.
"""
if not self.routing_info_json:
return {}
# TODO: move to a separate serializer class
json_data = json.loads(self.routing_info_json)
destimation_routes = {}
for destination_mode_str, routes_json in json_data.items():
destination_mode = DestinationMode(
destination_address=json.loads(destination_mode_str)[
"destination_address"
],
travel_mode=routing.TravelMode(
json.loads(destination_mode_str)["travel_mode"]
),
)
parsed_route = json.loads(routes_json[0])
routes = [
Route(
legs=[
RouteLegStep(
distance_meters=step["distance_meters"],
duration_s=step["duration_s"],
travel_mode=routing.TravelMode(step["travel_mode"]),
)
for step in parsed_route["legs"]
],
distance_meters=parsed_route["distance_meters"],
duration_s=int(parsed_route["duration_s"]),
)
]
destimation_routes[destination_mode] = routes
return destimation_routes
def serialize_routing_info(
self, routing_info: dict[DestinationMode, list[Route]]
) -> str:
"""
Serializes the routing_info to a JSON string.
"""
# TODO: move to a separate serializer class
# for destination_mode, routes in routing_info.items():
serialized = json.dumps(
{
json.dumps(dataclasses.asdict(destination_mode)): [
json.dumps(dataclasses.asdict(route)) for route in routes
]
for destination_mode, routes in routing_info.items()
}
)
return serialized
2025-06-04 21:09:29 +00:00
class FurnishType(enum.StrEnum):
FURNISHED = "furnished"
UNFURNISHED = "unfurnished"
PART_FURNISHED = "part furnished"
ASK_LANDLORD = "ask landlord"
UNKNOWN = "unknown"
2025-06-04 21:09:29 +00:00
class RentListing(Listing, table=True):
available_from: datetime | None = Field(default=None, nullable=True)
furnish_type: FurnishType | None = Field(nullable=False)
class BuyListing(Listing, table=True):
service_charge: float | None = Field(default=None, nullable=True)
lease_left: int | None = Field(
default=None, nullable=True
) # in years, e.g., 90, 80, etc.
@dataclass(frozen=True)
class DestinationMode:
destination_address: str
travel_mode: routing.TravelMode
def __hash__(self) -> int:
return hash((self.destination_address, self.travel_mode))
def __getstate__(self):
# This allows serializers to pick up a dict representation
return asdict(self)
def __iter__(self):
# Makes it behave like a dict when expected
return iter(asdict(self).items())
class ListingType(enum.StrEnum):
BUY = "BUY"
RENT = "RENT"
class QueryParameters(BaseModel):
"""Query parameters for filtering listings."""
model_config = {"frozen": True}
listing_type: ListingType
min_bedrooms: int = 1
max_bedrooms: int = 999
min_price: int = 0
max_price: int = 10_000_000
district_names: set[str] = PydanticField(default_factory=set)
radius: float = 0
page_size: int = 500 # items per page
2025-06-08 20:58:28 +00:00
max_days_since_added: int = 14 # for buy listings
furnish_types: list[FurnishType] | None = None
# The values below are not supported by rightmove
# hence we apply them after fetching
# available from; council tax
let_date_available_from: datetime | None = None
last_seen_days: int | None = None
min_sqm: int | None = None