migrate immoweb exporter to use models

This commit is contained in:
Viktor Barzin 2025-06-08 18:18:38 +00:00
parent e317d2ec54
commit 3785d01009
No known key found for this signature in database
GPG key ID: 4056458DBDBF8863
5 changed files with 94 additions and 22 deletions

View file

@ -4,7 +4,7 @@ from dataclasses import dataclass
import json
import pathlib
from typing import Any, List, Dict
from models.listing import ListingSite
from models.listing import ListingSite, PriceHistoryItem
from rec import floorplan, routing
import re
import datetime
@ -381,11 +381,19 @@ class Listing:
return None
@property
def priceHistory(self) -> list[dict[str, Any]]:
def priceHistory(self) -> list[PriceHistoryItem]:
if not self.path_price_history().exists():
return []
with open(self.path_price_history(), "r") as f:
return json.load(f)
data = json.load(f)
return [
PriceHistoryItem(
first_seen=datetime.datetime.fromisoformat(item["first_seen"]),
last_seen=datetime.datetime.fromisoformat(item["last_seen"]),
price=item["price"],
)
for item in data
]
@property
def longtitude(self) -> float:

View file

@ -329,7 +329,6 @@ def export_immoweb(
last_seen_days: int,
min_sqm: int | None = None,
):
# use model
query_parameters = QueryParameters(
listing_type=ListingType[type],
district_names=set(district),
@ -343,9 +342,10 @@ def export_immoweb(
min_sqm=min_sqm,
)
click.echo(
f"Exporting data to {output_file} that matches the query parameters: {query_parameters}"
f"Exporting data to {output_file} for listings stored in {engine.url} that match the query parameters: {query_parameters}"
)
asyncio.run(export_immoweb_ui(ctx, output_file, query_parameters))
repository = ListingRepository(engine=engine)
asyncio.run(export_immoweb_ui(repository, output_file, query_parameters))
@cli.command()

View file

@ -10,12 +10,19 @@ from rec import routing
from sqlmodel import JSON, SQLModel, Field, String
@dataclass
@dataclass(frozen=True)
class PriceHistoryItem:
first_seen: datetime
last_seen: datetime
price: float
def to_dict(self) -> Dict[str, float | str]:
return {
"first_seen": self.first_seen.isoformat(),
"last_seen": self.last_seen.isoformat(),
"price": self.price,
}
@dataclass(frozen=True)
class Route:
@ -54,7 +61,8 @@ class Listing(SQLModel, table=False):
council_tax_band: str | None = Field(default=None, nullable=True)
longtitude: float = Field(nullable=False)
latitude: float = Field(nullable=False)
price_history: List[Dict[str, Any]] = Field(default_factory=list, sa_type=JSON)
# price_history: List[Dict[str, Any]] = Field(default_factory=list, sa_type=JSON)
price_history_json: str = Field(sa_type=String)
listing_site: ListingSite = Field(nullable=False)
last_seen: datetime = Field(default_factory=datetime.now, nullable=False)
photo_thumbnail: str | None = Field(default=None, nullable=True)
@ -72,6 +80,56 @@ class Listing(SQLModel, table=False):
def is_removed(self) -> bool:
return not self.additional_info["property"]["visible"]
@property
def price_per_square_meter(self) -> float | None:
"""
Returns the price per square meter.
"""
if self.square_meters is None or self.square_meters == 0:
return None
return round(self.price / self.square_meters, 2)
@property
def url(self):
return f"https://www.rightmove.co.uk/properties/{self.id}"
@property
def price_history(self) -> List[PriceHistoryItem]:
"""
Returns a list of PriceHistoryItem objects from the price_history_json.
"""
if not self.price_history_json:
return []
parsed: list = json.loads(str(self.price_history_json))
for item in parsed:
item["first_seen"] = datetime.fromisoformat(item["first_seen"])
item["last_seen"] = datetime.fromisoformat(item["last_seen"])
return [
PriceHistoryItem(
first_seen=item["first_seen"],
last_seen=item["last_seen"],
price=item["price"],
)
for item in parsed
]
@staticmethod
def serialize_price_history(price_history: List[PriceHistoryItem]) -> str:
"""
Serializes the price history to a JSON string.
"""
serialized = json.dumps(
[
{
"first_seen": item.first_seen.isoformat(),
"last_seen": item.last_seen.isoformat(),
"price": item.price,
}
for item in price_history
]
)
return serialized
@property
def routing_info(self) -> dict[DestinationMode, List[Route]]:
"""

View file

@ -2,11 +2,14 @@
set -euxo pipefail
DATA_DIR="data/rs"
DATA_DIR="data/rs/test"
LISTING_FILTER_OPTIONS="--min-price 2000 --max-price 4000 --min-bedrooms 2 --max-bedrooms 4 -t rent --available-from $(date +%Y-%m-%d) --last-seen-days 7 --furnish-types furnished"
#LISTING_FILTER_OPTIONS="--min-price 2000 --max-price 2500 --min-bedrooms 2 --max-bedrooms 4 -t rent --available-from $(date +%Y-%m-%d) --last-seen-days 7 --furnish-types furnished --district Islington" # DEBUG: UNCOMMENT ME WHEN TESTING
poetry install
alembic upgrade head # init db
python main.py --data-dir $DATA_DIR dump-listings $LISTING_FILTER_OPTIONS
python main.py --data-dir $DATA_DIR dump-images
python main.py --data-dir $DATA_DIR detect-floorplan

View file

@ -1,23 +1,22 @@
import dataclasses
import json
import pathlib
from data_access import Listing
from rec.query import QueryParameters
from repositories.listing_repository import ListingRepository
async def export_immoweb(
ctx,
repository: ListingRepository,
output_file: str,
query_parameters: QueryParameters | None = None,
):
data_dir = ctx.obj["data_dir"]
output_file_path = pathlib.Path(output_file)
output_file_path.touch(exist_ok=True)
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
# listing_paths = listing_paths[:10]
listings = Listing.get_all_listings([str(path) for path in listing_paths])
if query_parameters is not None:
listings = await filter_listings(listings, query_parameters)
listings = await repository.get_listings(
query_parameters=query_parameters,
)
# Convert listings to immoweb format
immoweb_listings = []
@ -27,18 +26,22 @@ async def export_immoweb(
"properties": {
"city": "London", # change me
"country": "United Kingdom",
"qm": await listing.sqm_ocr(),
"qmprice": round(await listing.price_per_sqm(), 2),
"rooms": listing.bedrooms,
"qm": listing.square_meters,
"qmprice": listing.price_per_square_meter,
"rooms": listing.number_of_bedrooms,
"total_price": listing.price,
"url": listing.url,
"photo_thumbnail": listing.photo_thumbnail,
"last_seen": listing.last_seen.isoformat(),
"price_history": [item.to_dict() for item in listing.price_history],
"agency": listing.agency,
# Additional info; the above is GeoJSON format
# Below is all other crap we want in the UI
"info": await listing.dict_nicely(),
"info": listing.additional_info,
},
"geometry": {
"coordinates": [
listing.longitude,
listing.longtitude,
listing.latitude,
],
"type": "Point",