Flatten repo structure: move crawler/ to root, remove vqa/ and immoweb/

The crawler subdirectory was the only active project. Moving it to the
repo root simplifies paths and removes the unnecessary nesting. The
vqa/ and immoweb/ directories were legacy/unused and have been removed.

Updated .drone.yml, .gitignore, .claude/ docs, and skills to reflect
the new flat structure.
This commit is contained in:
Viktor Barzin 2026-02-07 23:01:20 +00:00
parent e2247be700
commit eafbc1ac52
No known key found for this signature in database
GPG key ID: 0EB088298288D958
221 changed files with 70 additions and 146140 deletions

5
models/__init__.py Normal file
View file

@ -0,0 +1,5 @@
from models.listing import Listing
from models.user import User
__all__ = ["Listing", "User"]

240
models/listing.py Normal file
View file

@ -0,0 +1,240 @@
from __future__ import annotations
from dataclasses import asdict, dataclass
import dataclasses
from datetime import datetime, timedelta
import enum
import json
from typing import Any, Dict, List
from pydantic import BaseModel, Field as PydanticField, model_validator
from rec import routing
from sqlmodel import JSON, TEXT, SQLModel, Field
@dataclass(frozen=True)
class PriceHistoryItem:
first_seen: datetime
last_seen: datetime
price: float
def to_dict(self) -> Dict[str, float | str]:
return {
"first_seen": self.first_seen.isoformat(),
"last_seen": self.last_seen.isoformat(),
"price": self.price,
}
@dataclass(frozen=True)
class Route:
legs: list[RouteLegStep]
distance_meters: int
duration_s: int
@property
def duration(self) -> timedelta:
return timedelta(seconds=self.duration_s)
@dataclass(frozen=True)
class RouteLegStep:
distance_meters: int
duration_s: int
travel_mode: routing.TravelMode
@property
def duration(self) -> timedelta:
return timedelta(seconds=self.duration_s)
class ListingSite(enum.StrEnum):
RIGHTMOVE = "rightmove"
# ZOOPLA = "zoopla"
# ... add more
def _parse_price_history(price_history_json: str) -> list[PriceHistoryItem]:
"""Parse a JSON string into a list of PriceHistoryItem objects."""
if not price_history_json:
return []
parsed: list = json.loads(str(price_history_json))
return [
PriceHistoryItem(
first_seen=datetime.fromisoformat(item["first_seen"]),
last_seen=datetime.fromisoformat(item["last_seen"]),
price=item["price"],
)
for item in parsed
]
class Listing(SQLModel, table=False):
id: int = Field(primary_key=True)
price: float = Field(nullable=False, index=True)
number_of_bedrooms: int = Field(nullable=False, index=True)
square_meters: float | None = Field(default=None, nullable=True, index=True)
agency: str | None = Field(default=None, nullable=True)
council_tax_band: str | None = Field(default=None, nullable=True)
longitude: float = Field(nullable=False)
latitude: float = Field(nullable=False)
price_history_json: str = Field(sa_type=TEXT)
listing_site: ListingSite = Field(nullable=False)
last_seen: datetime = Field(
default_factory=datetime.now, nullable=False, index=True
)
photo_thumbnail: str | None = Field(default=None, nullable=True)
floorplan_image_paths: List[str] = Field(
default_factory=list, sa_type=JSON, nullable=False
)
additional_info: Dict[str, Any] = Field(
default_factory=dict, sa_type=JSON, nullable=False
)
routing_info_json: str = Field(
sa_type=TEXT, nullable=True, default=None
) # Store as JSON string for simplicity
@property
def is_removed(self) -> bool:
if not self.additional_info:
return False
property_info = self.additional_info.get("property", {})
return not property_info.get("visible", True)
@property
def price_per_square_meter(self) -> float | None:
"""
Returns the price per square meter.
"""
if self.square_meters is None or self.square_meters == 0:
return None
return round(self.price / self.square_meters, 2)
@property
def url(self):
return f"https://www.rightmove.co.uk/properties/{self.id}"
@property
def price_history(self) -> List[PriceHistoryItem]:
"""
Returns a list of PriceHistoryItem objects from the price_history_json.
"""
return _parse_price_history(self.price_history_json)
@staticmethod
def serialize_price_history(price_history: List[PriceHistoryItem]) -> str:
"""
Serializes the price history to a JSON string.
"""
serialized = json.dumps(
[
{
"first_seen": item.first_seen.isoformat(),
"last_seen": item.last_seen.isoformat(),
"price": item.price,
}
for item in price_history
]
)
return serialized
@property
def routing_info(self) -> dict[DestinationMode, List[Route]]:
"""
Returns a list of DestinationMode objects from the routing_info_str.
"""
if not self.routing_info_json:
return {}
from rec.route_serializer import RouteSerializer
return RouteSerializer.deserialize(self.routing_info_json)
def serialize_routing_info(
self, routing_info: dict[DestinationMode, list[Route]]
) -> str:
"""
Serializes the routing_info to a JSON string.
"""
from rec.route_serializer import RouteSerializer
return RouteSerializer.serialize(routing_info)
class FurnishType(enum.StrEnum):
FURNISHED = "furnished"
UNFURNISHED = "unfurnished"
PART_FURNISHED = "part furnished"
ASK_LANDLORD = "ask landlord"
UNKNOWN = "unknown"
class RentListing(Listing, table=True):
available_from: datetime | None = Field(default=None, nullable=True)
furnish_type: FurnishType | None = Field(nullable=False)
class BuyListing(Listing, table=True):
service_charge: float | None = Field(default=None, nullable=True)
lease_left: int | None = Field(
default=None, nullable=True
) # in years, e.g., 90, 80, etc.
@dataclass(frozen=True)
class DestinationMode:
destination_address: str
travel_mode: routing.TravelMode
def __hash__(self) -> int:
return hash((self.destination_address, self.travel_mode))
def __getstate__(self):
# This allows serializers to pick up a dict representation
return asdict(self)
def to_dict(self) -> dict[str, Any]:
"""Return a dictionary representation of this DestinationMode."""
return asdict(self)
class ListingType(enum.StrEnum):
BUY = "BUY"
RENT = "RENT"
class QueryParameters(BaseModel):
"""Query parameters for filtering listings."""
model_config = {"frozen": True}
listing_type: ListingType
min_bedrooms: int = 1
max_bedrooms: int = 999
min_price: int = 0
max_price: int = 10_000_000
district_names: set[str] = PydanticField(default_factory=set)
radius: float = 0
page_size: int = 500 # items per page
max_days_since_added: int = 14 # for buy listings
furnish_types: list[FurnishType] | None = None
# The values below are not supported by rightmove
# hence we apply them after fetching
# available from; council tax
let_date_available_from: datetime | None = None
last_seen_days: int | None = None
min_sqm: int | None = None
@model_validator(mode="after")
def _validate_ranges(self) -> QueryParameters:
if self.min_price > self.max_price:
raise ValueError(
f"min_price ({self.min_price}) must be <= max_price ({self.max_price})"
)
if self.min_bedrooms < 0:
raise ValueError(
f"min_bedrooms ({self.min_bedrooms}) must be non-negative"
)
if self.max_bedrooms < 0:
raise ValueError(
f"max_bedrooms ({self.max_bedrooms}) must be non-negative"
)
if self.min_bedrooms > self.max_bedrooms:
raise ValueError(
f"min_bedrooms ({self.min_bedrooms}) must be <= max_bedrooms ({self.max_bedrooms})"
)
return self

View file

@ -0,0 +1,13 @@
from datetime import datetime
from sqlmodel import SQLModel, Field
class PasskeyCredential(SQLModel, table=True):
id: int | None = Field(default=None, primary_key=True)
credential_id: str = Field(index=True, unique=True)
public_key: str
sign_count: int = Field(default=0)
transports: str | None = Field(default=None) # JSON-encoded list
user_id: int = Field(foreign_key="user.id", index=True)
created_at: datetime = Field(default_factory=datetime.utcnow)

11
models/user.py Normal file
View file

@ -0,0 +1,11 @@
from datetime import datetime
from pydantic import EmailStr
from sqlmodel import SQLModel, Field
class User(SQLModel, table=True):
id: int = Field(primary_key=True)
email: EmailStr = Field(index=True, unique=True)
password: str | None = Field(default=None, nullable=True)
created_at: datetime = Field(default_factory=datetime.utcnow)