wrongmove/models/listing.py
Viktor Barzin eafbc1ac52
Flatten repo structure: move crawler/ to root, remove vqa/ and immoweb/
The crawler subdirectory was the only active project. Moving it to the
repo root simplifies paths and removes the unnecessary nesting. The
vqa/ and immoweb/ directories were legacy/unused and have been removed.

Updated .drone.yml, .gitignore, .claude/ docs, and skills to reflect
the new flat structure.
2026-02-07 23:01:20 +00:00

240 lines
7.4 KiB
Python

from __future__ import annotations
from dataclasses import asdict, dataclass
import dataclasses
from datetime import datetime, timedelta
import enum
import json
from typing import Any, Dict, List
from pydantic import BaseModel, Field as PydanticField, model_validator
from rec import routing
from sqlmodel import JSON, TEXT, SQLModel, Field
@dataclass(frozen=True)
class PriceHistoryItem:
first_seen: datetime
last_seen: datetime
price: float
def to_dict(self) -> Dict[str, float | str]:
return {
"first_seen": self.first_seen.isoformat(),
"last_seen": self.last_seen.isoformat(),
"price": self.price,
}
@dataclass(frozen=True)
class Route:
legs: list[RouteLegStep]
distance_meters: int
duration_s: int
@property
def duration(self) -> timedelta:
return timedelta(seconds=self.duration_s)
@dataclass(frozen=True)
class RouteLegStep:
distance_meters: int
duration_s: int
travel_mode: routing.TravelMode
@property
def duration(self) -> timedelta:
return timedelta(seconds=self.duration_s)
class ListingSite(enum.StrEnum):
RIGHTMOVE = "rightmove"
# ZOOPLA = "zoopla"
# ... add more
def _parse_price_history(price_history_json: str) -> list[PriceHistoryItem]:
"""Parse a JSON string into a list of PriceHistoryItem objects."""
if not price_history_json:
return []
parsed: list = json.loads(str(price_history_json))
return [
PriceHistoryItem(
first_seen=datetime.fromisoformat(item["first_seen"]),
last_seen=datetime.fromisoformat(item["last_seen"]),
price=item["price"],
)
for item in parsed
]
class Listing(SQLModel, table=False):
id: int = Field(primary_key=True)
price: float = Field(nullable=False, index=True)
number_of_bedrooms: int = Field(nullable=False, index=True)
square_meters: float | None = Field(default=None, nullable=True, index=True)
agency: str | None = Field(default=None, nullable=True)
council_tax_band: str | None = Field(default=None, nullable=True)
longitude: float = Field(nullable=False)
latitude: float = Field(nullable=False)
price_history_json: str = Field(sa_type=TEXT)
listing_site: ListingSite = Field(nullable=False)
last_seen: datetime = Field(
default_factory=datetime.now, nullable=False, index=True
)
photo_thumbnail: str | None = Field(default=None, nullable=True)
floorplan_image_paths: List[str] = Field(
default_factory=list, sa_type=JSON, nullable=False
)
additional_info: Dict[str, Any] = Field(
default_factory=dict, sa_type=JSON, nullable=False
)
routing_info_json: str = Field(
sa_type=TEXT, nullable=True, default=None
) # Store as JSON string for simplicity
@property
def is_removed(self) -> bool:
if not self.additional_info:
return False
property_info = self.additional_info.get("property", {})
return not property_info.get("visible", True)
@property
def price_per_square_meter(self) -> float | None:
"""
Returns the price per square meter.
"""
if self.square_meters is None or self.square_meters == 0:
return None
return round(self.price / self.square_meters, 2)
@property
def url(self):
return f"https://www.rightmove.co.uk/properties/{self.id}"
@property
def price_history(self) -> List[PriceHistoryItem]:
"""
Returns a list of PriceHistoryItem objects from the price_history_json.
"""
return _parse_price_history(self.price_history_json)
@staticmethod
def serialize_price_history(price_history: List[PriceHistoryItem]) -> str:
"""
Serializes the price history to a JSON string.
"""
serialized = json.dumps(
[
{
"first_seen": item.first_seen.isoformat(),
"last_seen": item.last_seen.isoformat(),
"price": item.price,
}
for item in price_history
]
)
return serialized
@property
def routing_info(self) -> dict[DestinationMode, List[Route]]:
"""
Returns a list of DestinationMode objects from the routing_info_str.
"""
if not self.routing_info_json:
return {}
from rec.route_serializer import RouteSerializer
return RouteSerializer.deserialize(self.routing_info_json)
def serialize_routing_info(
self, routing_info: dict[DestinationMode, list[Route]]
) -> str:
"""
Serializes the routing_info to a JSON string.
"""
from rec.route_serializer import RouteSerializer
return RouteSerializer.serialize(routing_info)
class FurnishType(enum.StrEnum):
FURNISHED = "furnished"
UNFURNISHED = "unfurnished"
PART_FURNISHED = "part furnished"
ASK_LANDLORD = "ask landlord"
UNKNOWN = "unknown"
class RentListing(Listing, table=True):
available_from: datetime | None = Field(default=None, nullable=True)
furnish_type: FurnishType | None = Field(nullable=False)
class BuyListing(Listing, table=True):
service_charge: float | None = Field(default=None, nullable=True)
lease_left: int | None = Field(
default=None, nullable=True
) # in years, e.g., 90, 80, etc.
@dataclass(frozen=True)
class DestinationMode:
destination_address: str
travel_mode: routing.TravelMode
def __hash__(self) -> int:
return hash((self.destination_address, self.travel_mode))
def __getstate__(self):
# This allows serializers to pick up a dict representation
return asdict(self)
def to_dict(self) -> dict[str, Any]:
"""Return a dictionary representation of this DestinationMode."""
return asdict(self)
class ListingType(enum.StrEnum):
BUY = "BUY"
RENT = "RENT"
class QueryParameters(BaseModel):
"""Query parameters for filtering listings."""
model_config = {"frozen": True}
listing_type: ListingType
min_bedrooms: int = 1
max_bedrooms: int = 999
min_price: int = 0
max_price: int = 10_000_000
district_names: set[str] = PydanticField(default_factory=set)
radius: float = 0
page_size: int = 500 # items per page
max_days_since_added: int = 14 # for buy listings
furnish_types: list[FurnishType] | None = None
# The values below are not supported by rightmove
# hence we apply them after fetching
# available from; council tax
let_date_available_from: datetime | None = None
last_seen_days: int | None = None
min_sqm: int | None = None
@model_validator(mode="after")
def _validate_ranges(self) -> QueryParameters:
if self.min_price > self.max_price:
raise ValueError(
f"min_price ({self.min_price}) must be <= max_price ({self.max_price})"
)
if self.min_bedrooms < 0:
raise ValueError(
f"min_bedrooms ({self.min_bedrooms}) must be non-negative"
)
if self.max_bedrooms < 0:
raise ValueError(
f"max_bedrooms ({self.max_bedrooms}) must be non-negative"
)
if self.min_bedrooms > self.max_bedrooms:
raise ValueError(
f"min_bedrooms ({self.min_bedrooms}) must be <= max_bedrooms ({self.max_bedrooms})"
)
return self