migrate immoweb exporter to use models
This commit is contained in:
parent
e317d2ec54
commit
3785d01009
5 changed files with 94 additions and 22 deletions
|
|
@ -4,7 +4,7 @@ from dataclasses import dataclass
|
||||||
import json
|
import json
|
||||||
import pathlib
|
import pathlib
|
||||||
from typing import Any, List, Dict
|
from typing import Any, List, Dict
|
||||||
from models.listing import ListingSite
|
from models.listing import ListingSite, PriceHistoryItem
|
||||||
from rec import floorplan, routing
|
from rec import floorplan, routing
|
||||||
import re
|
import re
|
||||||
import datetime
|
import datetime
|
||||||
|
|
@ -381,11 +381,19 @@ class Listing:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def priceHistory(self) -> list[dict[str, Any]]:
|
def priceHistory(self) -> list[PriceHistoryItem]:
|
||||||
if not self.path_price_history().exists():
|
if not self.path_price_history().exists():
|
||||||
return []
|
return []
|
||||||
with open(self.path_price_history(), "r") as f:
|
with open(self.path_price_history(), "r") as f:
|
||||||
return json.load(f)
|
data = json.load(f)
|
||||||
|
return [
|
||||||
|
PriceHistoryItem(
|
||||||
|
first_seen=datetime.datetime.fromisoformat(item["first_seen"]),
|
||||||
|
last_seen=datetime.datetime.fromisoformat(item["last_seen"]),
|
||||||
|
price=item["price"],
|
||||||
|
)
|
||||||
|
for item in data
|
||||||
|
]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def longtitude(self) -> float:
|
def longtitude(self) -> float:
|
||||||
|
|
|
||||||
|
|
@ -329,7 +329,6 @@ def export_immoweb(
|
||||||
last_seen_days: int,
|
last_seen_days: int,
|
||||||
min_sqm: int | None = None,
|
min_sqm: int | None = None,
|
||||||
):
|
):
|
||||||
# use model
|
|
||||||
query_parameters = QueryParameters(
|
query_parameters = QueryParameters(
|
||||||
listing_type=ListingType[type],
|
listing_type=ListingType[type],
|
||||||
district_names=set(district),
|
district_names=set(district),
|
||||||
|
|
@ -343,9 +342,10 @@ def export_immoweb(
|
||||||
min_sqm=min_sqm,
|
min_sqm=min_sqm,
|
||||||
)
|
)
|
||||||
click.echo(
|
click.echo(
|
||||||
f"Exporting data to {output_file} that matches the query parameters: {query_parameters}"
|
f"Exporting data to {output_file} for listings stored in {engine.url} that match the query parameters: {query_parameters}"
|
||||||
)
|
)
|
||||||
asyncio.run(export_immoweb_ui(ctx, output_file, query_parameters))
|
repository = ListingRepository(engine=engine)
|
||||||
|
asyncio.run(export_immoweb_ui(repository, output_file, query_parameters))
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
|
|
|
||||||
|
|
@ -10,12 +10,19 @@ from rec import routing
|
||||||
from sqlmodel import JSON, SQLModel, Field, String
|
from sqlmodel import JSON, SQLModel, Field, String
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass(frozen=True)
|
||||||
class PriceHistoryItem:
|
class PriceHistoryItem:
|
||||||
first_seen: datetime
|
first_seen: datetime
|
||||||
last_seen: datetime
|
last_seen: datetime
|
||||||
price: float
|
price: float
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, float | str]:
|
||||||
|
return {
|
||||||
|
"first_seen": self.first_seen.isoformat(),
|
||||||
|
"last_seen": self.last_seen.isoformat(),
|
||||||
|
"price": self.price,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class Route:
|
class Route:
|
||||||
|
|
@ -54,7 +61,8 @@ class Listing(SQLModel, table=False):
|
||||||
council_tax_band: str | None = Field(default=None, nullable=True)
|
council_tax_band: str | None = Field(default=None, nullable=True)
|
||||||
longtitude: float = Field(nullable=False)
|
longtitude: float = Field(nullable=False)
|
||||||
latitude: float = Field(nullable=False)
|
latitude: float = Field(nullable=False)
|
||||||
price_history: List[Dict[str, Any]] = Field(default_factory=list, sa_type=JSON)
|
# price_history: List[Dict[str, Any]] = Field(default_factory=list, sa_type=JSON)
|
||||||
|
price_history_json: str = Field(sa_type=String)
|
||||||
listing_site: ListingSite = Field(nullable=False)
|
listing_site: ListingSite = Field(nullable=False)
|
||||||
last_seen: datetime = Field(default_factory=datetime.now, nullable=False)
|
last_seen: datetime = Field(default_factory=datetime.now, nullable=False)
|
||||||
photo_thumbnail: str | None = Field(default=None, nullable=True)
|
photo_thumbnail: str | None = Field(default=None, nullable=True)
|
||||||
|
|
@ -72,6 +80,56 @@ class Listing(SQLModel, table=False):
|
||||||
def is_removed(self) -> bool:
|
def is_removed(self) -> bool:
|
||||||
return not self.additional_info["property"]["visible"]
|
return not self.additional_info["property"]["visible"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def price_per_square_meter(self) -> float | None:
|
||||||
|
"""
|
||||||
|
Returns the price per square meter.
|
||||||
|
"""
|
||||||
|
if self.square_meters is None or self.square_meters == 0:
|
||||||
|
return None
|
||||||
|
return round(self.price / self.square_meters, 2)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def url(self):
|
||||||
|
return f"https://www.rightmove.co.uk/properties/{self.id}"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def price_history(self) -> List[PriceHistoryItem]:
|
||||||
|
"""
|
||||||
|
Returns a list of PriceHistoryItem objects from the price_history_json.
|
||||||
|
"""
|
||||||
|
if not self.price_history_json:
|
||||||
|
return []
|
||||||
|
parsed: list = json.loads(str(self.price_history_json))
|
||||||
|
for item in parsed:
|
||||||
|
item["first_seen"] = datetime.fromisoformat(item["first_seen"])
|
||||||
|
item["last_seen"] = datetime.fromisoformat(item["last_seen"])
|
||||||
|
return [
|
||||||
|
PriceHistoryItem(
|
||||||
|
first_seen=item["first_seen"],
|
||||||
|
last_seen=item["last_seen"],
|
||||||
|
price=item["price"],
|
||||||
|
)
|
||||||
|
for item in parsed
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def serialize_price_history(price_history: List[PriceHistoryItem]) -> str:
|
||||||
|
"""
|
||||||
|
Serializes the price history to a JSON string.
|
||||||
|
"""
|
||||||
|
serialized = json.dumps(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"first_seen": item.first_seen.isoformat(),
|
||||||
|
"last_seen": item.last_seen.isoformat(),
|
||||||
|
"price": item.price,
|
||||||
|
}
|
||||||
|
for item in price_history
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return serialized
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def routing_info(self) -> dict[DestinationMode, List[Route]]:
|
def routing_info(self) -> dict[DestinationMode, List[Route]]:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,14 @@
|
||||||
|
|
||||||
set -euxo pipefail
|
set -euxo pipefail
|
||||||
|
|
||||||
DATA_DIR="data/rs"
|
DATA_DIR="data/rs/test"
|
||||||
|
|
||||||
LISTING_FILTER_OPTIONS="--min-price 2000 --max-price 4000 --min-bedrooms 2 --max-bedrooms 4 -t rent --available-from $(date +%Y-%m-%d) --last-seen-days 7 --furnish-types furnished"
|
LISTING_FILTER_OPTIONS="--min-price 2000 --max-price 4000 --min-bedrooms 2 --max-bedrooms 4 -t rent --available-from $(date +%Y-%m-%d) --last-seen-days 7 --furnish-types furnished"
|
||||||
|
#LISTING_FILTER_OPTIONS="--min-price 2000 --max-price 2500 --min-bedrooms 2 --max-bedrooms 4 -t rent --available-from $(date +%Y-%m-%d) --last-seen-days 7 --furnish-types furnished --district Islington" # DEBUG: UNCOMMENT ME WHEN TESTING
|
||||||
|
|
||||||
|
|
||||||
|
poetry install
|
||||||
alembic upgrade head # init db
|
alembic upgrade head # init db
|
||||||
|
|
||||||
python main.py --data-dir $DATA_DIR dump-listings $LISTING_FILTER_OPTIONS
|
python main.py --data-dir $DATA_DIR dump-listings $LISTING_FILTER_OPTIONS
|
||||||
python main.py --data-dir $DATA_DIR dump-images
|
python main.py --data-dir $DATA_DIR dump-images
|
||||||
python main.py --data-dir $DATA_DIR detect-floorplan
|
python main.py --data-dir $DATA_DIR detect-floorplan
|
||||||
|
|
|
||||||
|
|
@ -1,23 +1,22 @@
|
||||||
|
import dataclasses
|
||||||
import json
|
import json
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
from data_access import Listing
|
from data_access import Listing
|
||||||
from rec.query import QueryParameters
|
from rec.query import QueryParameters
|
||||||
|
from repositories.listing_repository import ListingRepository
|
||||||
|
|
||||||
|
|
||||||
async def export_immoweb(
|
async def export_immoweb(
|
||||||
ctx,
|
repository: ListingRepository,
|
||||||
output_file: str,
|
output_file: str,
|
||||||
query_parameters: QueryParameters | None = None,
|
query_parameters: QueryParameters | None = None,
|
||||||
):
|
):
|
||||||
data_dir = ctx.obj["data_dir"]
|
|
||||||
output_file_path = pathlib.Path(output_file)
|
output_file_path = pathlib.Path(output_file)
|
||||||
output_file_path.touch(exist_ok=True)
|
output_file_path.touch(exist_ok=True)
|
||||||
listing_paths = sorted(list(pathlib.Path(data_dir).glob("*/listing.json")))
|
listings = await repository.get_listings(
|
||||||
# listing_paths = listing_paths[:10]
|
query_parameters=query_parameters,
|
||||||
listings = Listing.get_all_listings([str(path) for path in listing_paths])
|
)
|
||||||
if query_parameters is not None:
|
|
||||||
listings = await filter_listings(listings, query_parameters)
|
|
||||||
|
|
||||||
# Convert listings to immoweb format
|
# Convert listings to immoweb format
|
||||||
immoweb_listings = []
|
immoweb_listings = []
|
||||||
|
|
@ -27,18 +26,22 @@ async def export_immoweb(
|
||||||
"properties": {
|
"properties": {
|
||||||
"city": "London", # change me
|
"city": "London", # change me
|
||||||
"country": "United Kingdom",
|
"country": "United Kingdom",
|
||||||
"qm": await listing.sqm_ocr(),
|
"qm": listing.square_meters,
|
||||||
"qmprice": round(await listing.price_per_sqm(), 2),
|
"qmprice": listing.price_per_square_meter,
|
||||||
"rooms": listing.bedrooms,
|
"rooms": listing.number_of_bedrooms,
|
||||||
"total_price": listing.price,
|
"total_price": listing.price,
|
||||||
"url": listing.url,
|
"url": listing.url,
|
||||||
|
"photo_thumbnail": listing.photo_thumbnail,
|
||||||
|
"last_seen": listing.last_seen.isoformat(),
|
||||||
|
"price_history": [item.to_dict() for item in listing.price_history],
|
||||||
|
"agency": listing.agency,
|
||||||
# Additional info; the above is GeoJSON format
|
# Additional info; the above is GeoJSON format
|
||||||
# Below is all other crap we want in the UI
|
# Below is all other crap we want in the UI
|
||||||
"info": await listing.dict_nicely(),
|
"info": listing.additional_info,
|
||||||
},
|
},
|
||||||
"geometry": {
|
"geometry": {
|
||||||
"coordinates": [
|
"coordinates": [
|
||||||
listing.longitude,
|
listing.longtitude,
|
||||||
listing.latitude,
|
listing.latitude,
|
||||||
],
|
],
|
||||||
"type": "Point",
|
"type": "Point",
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue