migrate immoweb exporter to use models
This commit is contained in:
parent
e317d2ec54
commit
3785d01009
5 changed files with 94 additions and 22 deletions
|
|
@ -4,7 +4,7 @@ from dataclasses import dataclass
|
|||
import json
|
||||
import pathlib
|
||||
from typing import Any, List, Dict
|
||||
from models.listing import ListingSite
|
||||
from models.listing import ListingSite, PriceHistoryItem
|
||||
from rec import floorplan, routing
|
||||
import re
|
||||
import datetime
|
||||
|
|
@ -381,11 +381,19 @@ class Listing:
|
|||
return None
|
||||
|
||||
@property
|
||||
def priceHistory(self) -> list[dict[str, Any]]:
|
||||
def priceHistory(self) -> list[PriceHistoryItem]:
|
||||
if not self.path_price_history().exists():
|
||||
return []
|
||||
with open(self.path_price_history(), "r") as f:
|
||||
return json.load(f)
|
||||
data = json.load(f)
|
||||
return [
|
||||
PriceHistoryItem(
|
||||
first_seen=datetime.datetime.fromisoformat(item["first_seen"]),
|
||||
last_seen=datetime.datetime.fromisoformat(item["last_seen"]),
|
||||
price=item["price"],
|
||||
)
|
||||
for item in data
|
||||
]
|
||||
|
||||
@property
|
||||
def longtitude(self) -> float:
|
||||
|
|
|
|||
|
|
@ -329,7 +329,6 @@ def export_immoweb(
|
|||
last_seen_days: int,
|
||||
min_sqm: int | None = None,
|
||||
):
|
||||
# use model
|
||||
query_parameters = QueryParameters(
|
||||
listing_type=ListingType[type],
|
||||
district_names=set(district),
|
||||
|
|
@ -343,9 +342,10 @@ def export_immoweb(
|
|||
min_sqm=min_sqm,
|
||||
)
|
||||
click.echo(
|
||||
f"Exporting data to {output_file} that matches the query parameters: {query_parameters}"
|
||||
f"Exporting data to {output_file} for listings stored in {engine.url} that match the query parameters: {query_parameters}"
|
||||
)
|
||||
asyncio.run(export_immoweb_ui(ctx, output_file, query_parameters))
|
||||
repository = ListingRepository(engine=engine)
|
||||
asyncio.run(export_immoweb_ui(repository, output_file, query_parameters))
|
||||
|
||||
|
||||
@cli.command()
|
||||
|
|
|
|||
|
|
@ -10,12 +10,19 @@ from rec import routing
|
|||
from sqlmodel import JSON, SQLModel, Field, String
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(frozen=True)
|
||||
class PriceHistoryItem:
|
||||
first_seen: datetime
|
||||
last_seen: datetime
|
||||
price: float
|
||||
|
||||
def to_dict(self) -> Dict[str, float | str]:
|
||||
return {
|
||||
"first_seen": self.first_seen.isoformat(),
|
||||
"last_seen": self.last_seen.isoformat(),
|
||||
"price": self.price,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Route:
|
||||
|
|
@ -54,7 +61,8 @@ class Listing(SQLModel, table=False):
|
|||
council_tax_band: str | None = Field(default=None, nullable=True)
|
||||
longtitude: float = Field(nullable=False)
|
||||
latitude: float = Field(nullable=False)
|
||||
price_history: List[Dict[str, Any]] = Field(default_factory=list, sa_type=JSON)
|
||||
# price_history: List[Dict[str, Any]] = Field(default_factory=list, sa_type=JSON)
|
||||
price_history_json: str = Field(sa_type=String)
|
||||
listing_site: ListingSite = Field(nullable=False)
|
||||
last_seen: datetime = Field(default_factory=datetime.now, nullable=False)
|
||||
photo_thumbnail: str | None = Field(default=None, nullable=True)
|
||||
|
|
@ -72,6 +80,56 @@ class Listing(SQLModel, table=False):
|
|||
def is_removed(self) -> bool:
|
||||
return not self.additional_info["property"]["visible"]
|
||||
|
||||
@property
|
||||
def price_per_square_meter(self) -> float | None:
|
||||
"""
|
||||
Returns the price per square meter.
|
||||
"""
|
||||
if self.square_meters is None or self.square_meters == 0:
|
||||
return None
|
||||
return round(self.price / self.square_meters, 2)
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return f"https://www.rightmove.co.uk/properties/{self.id}"
|
||||
|
||||
@property
|
||||
def price_history(self) -> List[PriceHistoryItem]:
|
||||
"""
|
||||
Returns a list of PriceHistoryItem objects from the price_history_json.
|
||||
"""
|
||||
if not self.price_history_json:
|
||||
return []
|
||||
parsed: list = json.loads(str(self.price_history_json))
|
||||
for item in parsed:
|
||||
item["first_seen"] = datetime.fromisoformat(item["first_seen"])
|
||||
item["last_seen"] = datetime.fromisoformat(item["last_seen"])
|
||||
return [
|
||||
PriceHistoryItem(
|
||||
first_seen=item["first_seen"],
|
||||
last_seen=item["last_seen"],
|
||||
price=item["price"],
|
||||
)
|
||||
for item in parsed
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def serialize_price_history(price_history: List[PriceHistoryItem]) -> str:
|
||||
"""
|
||||
Serializes the price history to a JSON string.
|
||||
"""
|
||||
serialized = json.dumps(
|
||||
[
|
||||
{
|
||||
"first_seen": item.first_seen.isoformat(),
|
||||
"last_seen": item.last_seen.isoformat(),
|
||||
"price": item.price,
|
||||
}
|
||||
for item in price_history
|
||||
]
|
||||
)
|
||||
return serialized
|
||||
|
||||
@property
|
||||
def routing_info(self) -> dict[DestinationMode, List[Route]]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -2,11 +2,14 @@
|
|||
|
||||
set -euxo pipefail
|
||||
|
||||
DATA_DIR="data/rs"
|
||||
DATA_DIR="data/rs/test"
|
||||
|
||||
LISTING_FILTER_OPTIONS="--min-price 2000 --max-price 4000 --min-bedrooms 2 --max-bedrooms 4 -t rent --available-from $(date +%Y-%m-%d) --last-seen-days 7 --furnish-types furnished"
|
||||
#LISTING_FILTER_OPTIONS="--min-price 2000 --max-price 2500 --min-bedrooms 2 --max-bedrooms 4 -t rent --available-from $(date +%Y-%m-%d) --last-seen-days 7 --furnish-types furnished --district Islington" # DEBUG: UNCOMMENT ME WHEN TESTING
|
||||
|
||||
|
||||
poetry install
|
||||
alembic upgrade head # init db
|
||||
|
||||
python main.py --data-dir $DATA_DIR dump-listings $LISTING_FILTER_OPTIONS
|
||||
python main.py --data-dir $DATA_DIR dump-images
|
||||
python main.py --data-dir $DATA_DIR detect-floorplan
|
||||
|
|
|
|||
|
|
@ -1,23 +1,22 @@
|
|||
import dataclasses
|
||||
import json
|
||||
import pathlib
|
||||
|
||||
from data_access import Listing
|
||||
from rec.query import QueryParameters
|
||||
from repositories.listing_repository import ListingRepository
|
||||
|
||||
|
||||
async def export_immoweb(
|
||||
ctx,
|
||||
repository: ListingRepository,
|
||||
output_file: str,
|
||||
query_parameters: QueryParameters | None = None,
|
||||
):
|
||||
data_dir = ctx.obj["data_dir"]
|
||||
output_file_path = pathlib.Path(output_file)
|
||||
output_file_path.touch(exist_ok=True)
|
||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
||||
# listing_paths = listing_paths[:10]
|
||||
listings = Listing.get_all_listings([str(path) for path in listing_paths])
|
||||
if query_parameters is not None:
|
||||
listings = await filter_listings(listings, query_parameters)
|
||||
listings = await repository.get_listings(
|
||||
query_parameters=query_parameters,
|
||||
)
|
||||
|
||||
# Convert listings to immoweb format
|
||||
immoweb_listings = []
|
||||
|
|
@ -27,18 +26,22 @@ async def export_immoweb(
|
|||
"properties": {
|
||||
"city": "London", # change me
|
||||
"country": "United Kingdom",
|
||||
"qm": await listing.sqm_ocr(),
|
||||
"qmprice": round(await listing.price_per_sqm(), 2),
|
||||
"rooms": listing.bedrooms,
|
||||
"qm": listing.square_meters,
|
||||
"qmprice": listing.price_per_square_meter,
|
||||
"rooms": listing.number_of_bedrooms,
|
||||
"total_price": listing.price,
|
||||
"url": listing.url,
|
||||
"photo_thumbnail": listing.photo_thumbnail,
|
||||
"last_seen": listing.last_seen.isoformat(),
|
||||
"price_history": [item.to_dict() for item in listing.price_history],
|
||||
"agency": listing.agency,
|
||||
# Additional info; the above is GeoJSON format
|
||||
# Below is all other crap we want in the UI
|
||||
"info": await listing.dict_nicely(),
|
||||
"info": listing.additional_info,
|
||||
},
|
||||
"geometry": {
|
||||
"coordinates": [
|
||||
listing.longitude,
|
||||
listing.longtitude,
|
||||
listing.latitude,
|
||||
],
|
||||
"type": "Point",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue