wrongmove/crawler/models/listing.py
Kadir b1e0a414cf Used ruff to cleanup
I hope it just works right as I cannot test things if they work
2025-09-14 19:02:30 +01:00

251 lines
7.9 KiB
Python

from __future__ import annotations
from dataclasses import asdict, dataclass
import dataclasses
from datetime import datetime, timedelta
import enum
import json
from typing import Any, Dict, List
from pydantic import BaseModel
from rec import routing
from sqlmodel import JSON, TEXT, SQLModel, Field
@dataclass(frozen=True)
class PriceHistoryItem:
first_seen: datetime
last_seen: datetime
price: float
def to_dict(self) -> Dict[str, float | str]:
return {
"first_seen": self.first_seen.isoformat(),
"last_seen": self.last_seen.isoformat(),
"price": self.price,
}
@dataclass(frozen=True)
class Route:
legs: list[RouteLegStep]
distance_meters: int
duration_s: int
@property
def duration(self) -> timedelta:
return timedelta(seconds=self.duration_s)
@dataclass(frozen=True)
class RouteLegStep:
distance_meters: int
duration_s: int
travel_mode: routing.TravelMode
@property
def duration(self) -> timedelta:
return timedelta(seconds=self.duration_s)
class ListingSite(enum.StrEnum):
RIGHTMOVE = "rightmove"
# ZOOPLA = "zoopla"
# ... add more
class Listing(SQLModel, table=False):
id: int = Field(primary_key=True)
price: float = Field(nullable=False, index=True)
number_of_bedrooms: int = Field(nullable=False, index=True)
square_meters: float | None = Field(default=None, nullable=True, index=True)
agency: str | None = Field(default=None, nullable=True)
council_tax_band: str | None = Field(default=None, nullable=True)
longtitude: float = Field(nullable=False)
latitude: float = Field(nullable=False)
# price_history: List[Dict[str, Any]] = Field(default_factory=list, sa_type=JSON)
price_history_json: str = Field(sa_type=TEXT)
listing_site: ListingSite = Field(nullable=False)
last_seen: datetime = Field(
default_factory=datetime.now, nullable=False, index=True
)
photo_thumbnail: str | None = Field(default=None, nullable=True)
floorplan_image_paths: List[str] = Field(
default_factory=list, sa_type=JSON, nullable=False
)
additional_info: Dict[str, Any] = Field(
default_factory=dict, sa_type=JSON, nullable=False
)
routing_info_json: str = Field(
sa_type=TEXT, nullable=True, default=None
) # Store as JSON string for simplicity
@property
def is_removed(self) -> bool:
return not self.additional_info["property"]["visible"]
@property
def price_per_square_meter(self) -> float | None:
"""
Returns the price per square meter.
"""
if self.square_meters is None or self.square_meters == 0:
return None
return round(self.price / self.square_meters, 2)
@property
def url(self):
return f"https://www.rightmove.co.uk/properties/{self.id}"
@property
def price_history(self) -> List[PriceHistoryItem]:
"""
Returns a list of PriceHistoryItem objects from the price_history_json.
"""
if not self.price_history_json:
return []
parsed: list = json.loads(str(self.price_history_json))
for item in parsed:
item["first_seen"] = datetime.fromisoformat(item["first_seen"])
item["last_seen"] = datetime.fromisoformat(item["last_seen"])
return [
PriceHistoryItem(
first_seen=item["first_seen"],
last_seen=item["last_seen"],
price=item["price"],
)
for item in parsed
]
@staticmethod
def serialize_price_history(price_history: List[PriceHistoryItem]) -> str:
"""
Serializes the price history to a JSON string.
"""
serialized = json.dumps(
[
{
"first_seen": item.first_seen.isoformat(),
"last_seen": item.last_seen.isoformat(),
"price": item.price,
}
for item in price_history
]
)
return serialized
@property
def routing_info(self) -> dict[DestinationMode, List[Route]]:
"""
Returns a list of DestinationMode objects from the routing_info_str.
"""
if not self.routing_info_json:
return {}
# TODO: move to a separate serializer class
json_data = json.loads(self.routing_info_json)
destimation_routes = {}
for destination_mode_str, routes_json in json_data.items():
destination_mode = DestinationMode(
destination_address=json.loads(destination_mode_str)[
"destination_address"
],
travel_mode=routing.TravelMode(
json.loads(destination_mode_str)["travel_mode"]
),
)
parsed_route = json.loads(routes_json[0])
routes = [
Route(
legs=[
RouteLegStep(
distance_meters=step["distance_meters"],
duration_s=step["duration_s"],
travel_mode=routing.TravelMode(step["travel_mode"]),
)
for step in parsed_route["legs"]
],
distance_meters=parsed_route["distance_meters"],
duration_s=int(parsed_route["duration_s"]),
)
]
destimation_routes[destination_mode] = routes
return destimation_routes
def serialize_routing_info(
self, routing_info: dict[DestinationMode, list[Route]]
) -> str:
"""
Serializes the routing_info to a JSON string.
"""
# TODO: move to a separate serializer class
# for destination_mode, routes in routing_info.items():
serialized = json.dumps(
{
json.dumps(dataclasses.asdict(destination_mode)): [
json.dumps(dataclasses.asdict(route)) for route in routes
]
for destination_mode, routes in routing_info.items()
}
)
return serialized
class FurnishType(enum.StrEnum):
FURNISHED = "furnished"
UNFURNISHED = "unfurnished"
PART_FURNISHED = "part furnished"
ASK_LANDLORD = "ask landlord"
UNKNOWN = "unknown"
class RentListing(Listing, table=True):
available_from: datetime | None = Field(default=None, nullable=True)
furnish_type: FurnishType | None = Field(nullable=False)
class BuyListing(Listing, table=True):
service_charge: float | None = Field(default=None, nullable=True)
lease_left: int | None = Field(
default=None, nullable=True
) # in years, e.g., 90, 80, etc.
@dataclass(frozen=True)
class DestinationMode:
destination_address: str
travel_mode: routing.TravelMode
def __hash__(self) -> int:
return hash((self.destination_address, self.travel_mode))
def __getstate__(self):
# This allows serializers to pick up a dict representation
return asdict(self)
def __iter__(self):
# Makes it behave like a dict when expected
return iter(asdict(self).items())
class ListingType(enum.StrEnum):
BUY = "BUY"
RENT = "RENT"
@dataclass(frozen=True)
class QueryParameters(BaseModel):
listing_type: ListingType
min_bedrooms: int = 1
max_bedrooms: int = 999
min_price: int = 0
max_price: int = 10_000_000
district_names: set[str] = dataclasses.field(default_factory=set)
radius: float = 0
page_size: int = 500 # items per page
max_days_since_added: int = 14 # for buy listings
furnish_types: list[FurnishType] | None = None
# The values below are not supported by rightmove
# hence we apply them after fetching
# available from; council tax
let_date_available_from: datetime | None = None
last_seen_days: int | None = None
min_sqm: int | None = None