Fix buy listing support: thread ListingType through processing pipeline
The listing processor was hardcoded to create RentListing objects and query only the rentlisting table. Buy listings fetched from Rightmove were stored in the wrong table with missing fields. This threads ListingType through ListingProcessor and all Step subclasses so the correct model (RentListing/BuyListing) is created, the correct table is queried, and buy-specific fields (service_charge, lease_left) are parsed from the API response and included in GeoJSON streaming output.
This commit is contained in:
parent
5e48a26958
commit
e5ce8c1201
6 changed files with 116 additions and 41 deletions
|
|
@ -6,9 +6,19 @@ from datetime import datetime
|
|||
import logging
|
||||
import multiprocessing
|
||||
from pathlib import Path
|
||||
import re
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
import aiohttp
|
||||
from models.listing import FurnishType, Listing, ListingSite, RentListing
|
||||
from models.listing import (
|
||||
BuyListing,
|
||||
FurnishType,
|
||||
Listing,
|
||||
ListingSite,
|
||||
ListingType,
|
||||
QueryParameters,
|
||||
RentListing,
|
||||
)
|
||||
from rec import floorplan
|
||||
from rec.query import detail_query
|
||||
from repositories.listing_repository import ListingRepository
|
||||
|
|
@ -56,15 +66,16 @@ class ListingProcessor:
|
|||
"DetectFloorplanStep": "ocr",
|
||||
}
|
||||
|
||||
def __init__(self, listing_repository: ListingRepository):
|
||||
def __init__(self, listing_repository: ListingRepository, listing_type: ListingType = ListingType.RENT):
|
||||
self.semaphore = asyncio.Semaphore(20)
|
||||
self.listing_repository = listing_repository
|
||||
self.listing_type = listing_type
|
||||
# Register new processing steps here
|
||||
# Order is important
|
||||
self.process_steps = [
|
||||
FetchListingDetailsStep(listing_repository),
|
||||
FetchImagesStep(listing_repository),
|
||||
DetectFloorplanStep(listing_repository),
|
||||
FetchListingDetailsStep(listing_repository, listing_type),
|
||||
FetchImagesStep(listing_repository, listing_type),
|
||||
DetectFloorplanStep(listing_repository, listing_type),
|
||||
]
|
||||
|
||||
async def process_listing(
|
||||
|
|
@ -72,7 +83,7 @@ class ListingProcessor:
|
|||
listing_id: int,
|
||||
on_step_complete: Callable[[str], None] | None = None,
|
||||
) -> Listing | None:
|
||||
await self.listing_repository.mark_seen(listing_id)
|
||||
await self.listing_repository.mark_seen(listing_id, self.listing_type)
|
||||
listing = None
|
||||
for step in self.process_steps:
|
||||
if await step.needs_processing(listing_id):
|
||||
|
|
@ -94,9 +105,15 @@ class ListingProcessor:
|
|||
|
||||
class Step:
|
||||
listing_repository: ListingRepository
|
||||
listing_type: ListingType
|
||||
|
||||
def __init__(self, listing_repository: ListingRepository):
|
||||
def __init__(self, listing_repository: ListingRepository, listing_type: ListingType = ListingType.RENT):
|
||||
self.listing_repository = listing_repository
|
||||
self.listing_type = listing_type
|
||||
|
||||
def _query_params(self) -> QueryParameters:
|
||||
"""Build minimal QueryParameters for ID-based lookups in the correct table."""
|
||||
return QueryParameters(listing_type=self.listing_type)
|
||||
|
||||
@abstractmethod
|
||||
async def process(self, listing_id: int) -> Listing: ...
|
||||
|
|
@ -106,10 +123,36 @@ class Step:
|
|||
return True
|
||||
|
||||
|
||||
def _parse_service_charge(details: dict[str, Any]) -> float | None:
|
||||
"""Parse annual service charge from the tenure info in API response."""
|
||||
tenure_content = (
|
||||
details.get("property", {}).get("tenureInfo", {}).get("content", [])
|
||||
)
|
||||
for item in tenure_content:
|
||||
if item.get("type") == "annualServiceCharge":
|
||||
matches = re.findall(r"([\d,.]+)", str(item.get("value", "")))
|
||||
if matches:
|
||||
return float(matches[0].replace(",", ""))
|
||||
return None
|
||||
|
||||
|
||||
def _parse_lease_left(details: dict[str, Any]) -> int | None:
|
||||
"""Parse remaining lease years from the tenure info in API response."""
|
||||
tenure_content = (
|
||||
details.get("property", {}).get("tenureInfo", {}).get("content", [])
|
||||
)
|
||||
for item in tenure_content:
|
||||
if item.get("type") == "lengthOfLease":
|
||||
matches = re.findall(r"(\d+)", str(item.get("value", "")))
|
||||
if matches:
|
||||
return int(matches[0])
|
||||
return None
|
||||
|
||||
|
||||
class FetchListingDetailsStep(Step):
|
||||
async def needs_processing(self, listing_id: int) -> bool:
|
||||
existing_listings = await self.listing_repository.get_listings(
|
||||
only_ids=[listing_id]
|
||||
only_ids=[listing_id], query_parameters=self._query_params()
|
||||
)
|
||||
if len(existing_listings) == 0:
|
||||
return True
|
||||
|
|
@ -120,7 +163,7 @@ class FetchListingDetailsStep(Step):
|
|||
logger.info(f"[{listing_id}] Fetching details...")
|
||||
|
||||
existing_listings = await self.listing_repository.get_listings(
|
||||
only_ids=[listing_id]
|
||||
only_ids=[listing_id], query_parameters=self._query_params()
|
||||
)
|
||||
now = datetime.now()
|
||||
if len(existing_listings) > 0:
|
||||
|
|
@ -130,16 +173,8 @@ class FetchListingDetailsStep(Step):
|
|||
|
||||
listing_details = await detail_query(listing_id)
|
||||
|
||||
furnish_type = _parse_furnish_type(
|
||||
listing_details["property"].get("letFurnishType", "unknown")
|
||||
)
|
||||
|
||||
available_from = _parse_available_from(
|
||||
listing_details["property"]["letDateAvailable"]
|
||||
)
|
||||
|
||||
photos = listing_details["property"]["photos"]
|
||||
listing = RentListing( # TODO: should pick based on price?
|
||||
common_kwargs = dict(
|
||||
id=listing_id,
|
||||
price=listing_details["property"]["price"],
|
||||
number_of_bedrooms=listing_details["property"]["bedrooms"],
|
||||
|
|
@ -154,10 +189,29 @@ class FetchListingDetailsStep(Step):
|
|||
listing_site=ListingSite.RIGHTMOVE,
|
||||
last_seen=now,
|
||||
photo_thumbnail=photos[0]["thumbnailUrl"] if len(photos) > 0 else None,
|
||||
furnish_type=furnish_type,
|
||||
available_from=available_from,
|
||||
additional_info=listing_details,
|
||||
)
|
||||
|
||||
listing: Listing
|
||||
if self.listing_type == ListingType.BUY:
|
||||
listing = BuyListing(
|
||||
**common_kwargs,
|
||||
service_charge=_parse_service_charge(listing_details),
|
||||
lease_left=_parse_lease_left(listing_details),
|
||||
)
|
||||
else:
|
||||
furnish_type = _parse_furnish_type(
|
||||
listing_details["property"].get("letFurnishType", "unknown")
|
||||
)
|
||||
available_from = _parse_available_from(
|
||||
listing_details["property"]["letDateAvailable"]
|
||||
)
|
||||
listing = RentListing(
|
||||
**common_kwargs,
|
||||
furnish_type=furnish_type,
|
||||
available_from=available_from,
|
||||
)
|
||||
|
||||
await self.listing_repository.upsert_listings([listing])
|
||||
|
||||
logger.info(
|
||||
|
|
@ -172,7 +226,7 @@ class FetchListingDetailsStep(Step):
|
|||
class FetchImagesStep(Step):
|
||||
async def needs_processing(self, listing_id: int) -> bool:
|
||||
existing_listings = await self.listing_repository.get_listings(
|
||||
only_ids=[listing_id]
|
||||
only_ids=[listing_id], query_parameters=self._query_params()
|
||||
)
|
||||
if len(existing_listings) == 0:
|
||||
return False # if listing doesn't exist, we can't process it
|
||||
|
|
@ -183,7 +237,7 @@ class FetchImagesStep(Step):
|
|||
logger.debug(f"[{listing_id}] Fetching floorplan images")
|
||||
|
||||
existing_listings = await self.listing_repository.get_listings(
|
||||
only_ids=[listing_id]
|
||||
only_ids=[listing_id], query_parameters=self._query_params()
|
||||
)
|
||||
if len(existing_listings) == 0:
|
||||
raise ValueError(f"Listing {listing_id} not found")
|
||||
|
|
@ -228,12 +282,14 @@ class FetchImagesStep(Step):
|
|||
class DetectFloorplanStep(Step):
|
||||
ocr_semaphore: asyncio.Semaphore
|
||||
|
||||
def __init__(self, listing_repository: ListingRepository):
|
||||
super().__init__(listing_repository)
|
||||
def __init__(self, listing_repository: ListingRepository, listing_type: ListingType = ListingType.RENT):
|
||||
super().__init__(listing_repository, listing_type)
|
||||
self.ocr_semaphore = asyncio.Semaphore(MAX_OCR_WORKERS)
|
||||
|
||||
async def needs_processing(self, listing_id: int) -> bool:
|
||||
listings = await self.listing_repository.get_listings(only_ids=[listing_id])
|
||||
listings = await self.listing_repository.get_listings(
|
||||
only_ids=[listing_id], query_parameters=self._query_params()
|
||||
)
|
||||
if len(listings) == 0:
|
||||
return False
|
||||
return listings[0].square_meters is None
|
||||
|
|
@ -241,7 +297,9 @@ class DetectFloorplanStep(Step):
|
|||
async def process(self, listing_id: int) -> Listing:
|
||||
logger.debug(f"[{listing_id}] Running OCR on floorplans")
|
||||
|
||||
listings = await self.listing_repository.get_listings(only_ids=[listing_id])
|
||||
listings = await self.listing_repository.get_listings(
|
||||
only_ids=[listing_id], query_parameters=self._query_params()
|
||||
)
|
||||
if len(listings) == 0:
|
||||
raise ValueError(f"Listing {listing_id} does not exist")
|
||||
listing = listings[0]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue