Flatten repo structure: move crawler/ to root, remove vqa/ and immoweb/
The crawler subdirectory was the only active project. Moving it to the repo root simplifies paths and removes the unnecessary nesting. The vqa/ and immoweb/ directories were legacy/unused and have been removed. Updated .drone.yml, .gitignore, .claude/ docs, and skills to reflect the new flat structure.
This commit is contained in:
parent
e2247be700
commit
eafbc1ac52
221 changed files with 70 additions and 146140 deletions
5
models/__init__.py
Normal file
5
models/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
from models.listing import Listing
|
||||
from models.user import User
|
||||
|
||||
|
||||
__all__ = ["Listing", "User"]
|
||||
240
models/listing.py
Normal file
240
models/listing.py
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
from __future__ import annotations
|
||||
from dataclasses import asdict, dataclass
|
||||
import dataclasses
|
||||
from datetime import datetime, timedelta
|
||||
import enum
|
||||
import json
|
||||
from typing import Any, Dict, List
|
||||
from pydantic import BaseModel, Field as PydanticField, model_validator
|
||||
from rec import routing
|
||||
from sqlmodel import JSON, TEXT, SQLModel, Field
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PriceHistoryItem:
|
||||
first_seen: datetime
|
||||
last_seen: datetime
|
||||
price: float
|
||||
|
||||
def to_dict(self) -> Dict[str, float | str]:
|
||||
return {
|
||||
"first_seen": self.first_seen.isoformat(),
|
||||
"last_seen": self.last_seen.isoformat(),
|
||||
"price": self.price,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Route:
|
||||
legs: list[RouteLegStep]
|
||||
distance_meters: int
|
||||
duration_s: int
|
||||
|
||||
@property
|
||||
def duration(self) -> timedelta:
|
||||
return timedelta(seconds=self.duration_s)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RouteLegStep:
|
||||
distance_meters: int
|
||||
duration_s: int
|
||||
travel_mode: routing.TravelMode
|
||||
|
||||
@property
|
||||
def duration(self) -> timedelta:
|
||||
return timedelta(seconds=self.duration_s)
|
||||
|
||||
|
||||
class ListingSite(enum.StrEnum):
|
||||
RIGHTMOVE = "rightmove"
|
||||
# ZOOPLA = "zoopla"
|
||||
# ... add more
|
||||
|
||||
|
||||
def _parse_price_history(price_history_json: str) -> list[PriceHistoryItem]:
|
||||
"""Parse a JSON string into a list of PriceHistoryItem objects."""
|
||||
if not price_history_json:
|
||||
return []
|
||||
parsed: list = json.loads(str(price_history_json))
|
||||
return [
|
||||
PriceHistoryItem(
|
||||
first_seen=datetime.fromisoformat(item["first_seen"]),
|
||||
last_seen=datetime.fromisoformat(item["last_seen"]),
|
||||
price=item["price"],
|
||||
)
|
||||
for item in parsed
|
||||
]
|
||||
|
||||
|
||||
class Listing(SQLModel, table=False):
|
||||
id: int = Field(primary_key=True)
|
||||
price: float = Field(nullable=False, index=True)
|
||||
number_of_bedrooms: int = Field(nullable=False, index=True)
|
||||
square_meters: float | None = Field(default=None, nullable=True, index=True)
|
||||
agency: str | None = Field(default=None, nullable=True)
|
||||
council_tax_band: str | None = Field(default=None, nullable=True)
|
||||
longitude: float = Field(nullable=False)
|
||||
latitude: float = Field(nullable=False)
|
||||
price_history_json: str = Field(sa_type=TEXT)
|
||||
listing_site: ListingSite = Field(nullable=False)
|
||||
last_seen: datetime = Field(
|
||||
default_factory=datetime.now, nullable=False, index=True
|
||||
)
|
||||
photo_thumbnail: str | None = Field(default=None, nullable=True)
|
||||
floorplan_image_paths: List[str] = Field(
|
||||
default_factory=list, sa_type=JSON, nullable=False
|
||||
)
|
||||
additional_info: Dict[str, Any] = Field(
|
||||
default_factory=dict, sa_type=JSON, nullable=False
|
||||
)
|
||||
routing_info_json: str = Field(
|
||||
sa_type=TEXT, nullable=True, default=None
|
||||
) # Store as JSON string for simplicity
|
||||
|
||||
@property
|
||||
def is_removed(self) -> bool:
|
||||
if not self.additional_info:
|
||||
return False
|
||||
property_info = self.additional_info.get("property", {})
|
||||
return not property_info.get("visible", True)
|
||||
|
||||
@property
|
||||
def price_per_square_meter(self) -> float | None:
|
||||
"""
|
||||
Returns the price per square meter.
|
||||
"""
|
||||
if self.square_meters is None or self.square_meters == 0:
|
||||
return None
|
||||
return round(self.price / self.square_meters, 2)
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return f"https://www.rightmove.co.uk/properties/{self.id}"
|
||||
|
||||
@property
|
||||
def price_history(self) -> List[PriceHistoryItem]:
|
||||
"""
|
||||
Returns a list of PriceHistoryItem objects from the price_history_json.
|
||||
"""
|
||||
return _parse_price_history(self.price_history_json)
|
||||
|
||||
@staticmethod
|
||||
def serialize_price_history(price_history: List[PriceHistoryItem]) -> str:
|
||||
"""
|
||||
Serializes the price history to a JSON string.
|
||||
"""
|
||||
serialized = json.dumps(
|
||||
[
|
||||
{
|
||||
"first_seen": item.first_seen.isoformat(),
|
||||
"last_seen": item.last_seen.isoformat(),
|
||||
"price": item.price,
|
||||
}
|
||||
for item in price_history
|
||||
]
|
||||
)
|
||||
return serialized
|
||||
|
||||
@property
|
||||
def routing_info(self) -> dict[DestinationMode, List[Route]]:
|
||||
"""
|
||||
Returns a list of DestinationMode objects from the routing_info_str.
|
||||
"""
|
||||
if not self.routing_info_json:
|
||||
return {}
|
||||
from rec.route_serializer import RouteSerializer
|
||||
return RouteSerializer.deserialize(self.routing_info_json)
|
||||
|
||||
def serialize_routing_info(
|
||||
self, routing_info: dict[DestinationMode, list[Route]]
|
||||
) -> str:
|
||||
"""
|
||||
Serializes the routing_info to a JSON string.
|
||||
"""
|
||||
from rec.route_serializer import RouteSerializer
|
||||
return RouteSerializer.serialize(routing_info)
|
||||
|
||||
|
||||
class FurnishType(enum.StrEnum):
|
||||
FURNISHED = "furnished"
|
||||
UNFURNISHED = "unfurnished"
|
||||
PART_FURNISHED = "part furnished"
|
||||
ASK_LANDLORD = "ask landlord"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class RentListing(Listing, table=True):
|
||||
available_from: datetime | None = Field(default=None, nullable=True)
|
||||
furnish_type: FurnishType | None = Field(nullable=False)
|
||||
|
||||
|
||||
class BuyListing(Listing, table=True):
|
||||
service_charge: float | None = Field(default=None, nullable=True)
|
||||
lease_left: int | None = Field(
|
||||
default=None, nullable=True
|
||||
) # in years, e.g., 90, 80, etc.
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DestinationMode:
|
||||
destination_address: str
|
||||
travel_mode: routing.TravelMode
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash((self.destination_address, self.travel_mode))
|
||||
|
||||
def __getstate__(self):
|
||||
# This allows serializers to pick up a dict representation
|
||||
return asdict(self)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Return a dictionary representation of this DestinationMode."""
|
||||
return asdict(self)
|
||||
|
||||
|
||||
class ListingType(enum.StrEnum):
|
||||
BUY = "BUY"
|
||||
RENT = "RENT"
|
||||
|
||||
|
||||
class QueryParameters(BaseModel):
|
||||
"""Query parameters for filtering listings."""
|
||||
model_config = {"frozen": True}
|
||||
|
||||
listing_type: ListingType
|
||||
min_bedrooms: int = 1
|
||||
max_bedrooms: int = 999
|
||||
min_price: int = 0
|
||||
max_price: int = 10_000_000
|
||||
district_names: set[str] = PydanticField(default_factory=set)
|
||||
radius: float = 0
|
||||
page_size: int = 500 # items per page
|
||||
max_days_since_added: int = 14 # for buy listings
|
||||
furnish_types: list[FurnishType] | None = None
|
||||
# The values below are not supported by rightmove
|
||||
# hence we apply them after fetching
|
||||
# available from; council tax
|
||||
let_date_available_from: datetime | None = None
|
||||
last_seen_days: int | None = None
|
||||
min_sqm: int | None = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _validate_ranges(self) -> QueryParameters:
|
||||
if self.min_price > self.max_price:
|
||||
raise ValueError(
|
||||
f"min_price ({self.min_price}) must be <= max_price ({self.max_price})"
|
||||
)
|
||||
if self.min_bedrooms < 0:
|
||||
raise ValueError(
|
||||
f"min_bedrooms ({self.min_bedrooms}) must be non-negative"
|
||||
)
|
||||
if self.max_bedrooms < 0:
|
||||
raise ValueError(
|
||||
f"max_bedrooms ({self.max_bedrooms}) must be non-negative"
|
||||
)
|
||||
if self.min_bedrooms > self.max_bedrooms:
|
||||
raise ValueError(
|
||||
f"min_bedrooms ({self.min_bedrooms}) must be <= max_bedrooms ({self.max_bedrooms})"
|
||||
)
|
||||
return self
|
||||
13
models/passkey_credential.py
Normal file
13
models/passkey_credential.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
from datetime import datetime
|
||||
|
||||
from sqlmodel import SQLModel, Field
|
||||
|
||||
|
||||
class PasskeyCredential(SQLModel, table=True):
|
||||
id: int | None = Field(default=None, primary_key=True)
|
||||
credential_id: str = Field(index=True, unique=True)
|
||||
public_key: str
|
||||
sign_count: int = Field(default=0)
|
||||
transports: str | None = Field(default=None) # JSON-encoded list
|
||||
user_id: int = Field(foreign_key="user.id", index=True)
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
11
models/user.py
Normal file
11
models/user.py
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
from datetime import datetime
|
||||
|
||||
from pydantic import EmailStr
|
||||
from sqlmodel import SQLModel, Field
|
||||
|
||||
|
||||
class User(SQLModel, table=True):
|
||||
id: int = Field(primary_key=True)
|
||||
email: EmailStr = Field(index=True, unique=True)
|
||||
password: str | None = Field(default=None, nullable=True)
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
Loading…
Add table
Add a link
Reference in a new issue