diff --git a/crawler/api/app.py b/crawler/api/app.py
index c894b19..a96679e 100644
--- a/crawler/api/app.py
+++ b/crawler/api/app.py
@@ -146,7 +146,7 @@ async def stream_listing_geojson(
for row in repository.stream_listings_optimized(
query_parameters, limit=limit, page_size=batch_size
):
- feature = convert_row_to_geojson(row)
+ feature = convert_row_to_geojson(row, query_parameters.listing_type.value)
batch.append(feature)
count += 1
diff --git a/crawler/frontend/src/components/FilterPanel.tsx b/crawler/frontend/src/components/FilterPanel.tsx
index 16b39e6..dc9ab3a 100644
--- a/crawler/frontend/src/components/FilterPanel.tsx
+++ b/crawler/frontend/src/components/FilterPanel.tsx
@@ -101,6 +101,24 @@ export function FilterPanel({ onSubmit, isLoading, listingCount }: FilterPanelPr
},
});
+ // Watch listing_type to make filters type-aware
+ const watchedListingType = form.watch('listing_type');
+
+ // Update price defaults when listing type changes
+ useEffect(() => {
+ if (watchedListingType === ListingType.BUY) {
+ form.setValue('min_price', 300000);
+ form.setValue('max_price', 600000);
+ } else {
+ form.setValue('min_price', 2000);
+ form.setValue('max_price', 3000);
+ }
+ // Clear furnish types when switching to BUY
+ if (watchedListingType === ListingType.BUY) {
+ setSelectedFurnishTypes([]);
+ }
+ }, [watchedListingType, form]);
+
const handleFormSubmit = (action: 'fetch-data' | 'visualize') => {
return form.handleSubmit((values) => {
const params: ParameterValues = {
@@ -400,29 +418,31 @@ export function FilterPanel({ onSubmit, isLoading, listingCount }: FilterPanelPr
)}
/>
-
-
Furnishing
-
- {[
- { value: FurnishType.FURNISHED, label: 'Furnished' },
- { value: FurnishType.PART_FURNISHED, label: 'Part' },
- { value: FurnishType.UNFURNISHED, label: 'Unfurn.' },
- ].map((option) => (
-
- ))}
+ {watchedListingType === ListingType.RENT && (
+
+
Furnishing
+
+ {[
+ { value: FurnishType.FURNISHED, label: 'Furnished' },
+ { value: FurnishType.PART_FURNISHED, label: 'Part' },
+ { value: FurnishType.UNFURNISHED, label: 'Unfurn.' },
+ ].map((option) => (
+
+ ))}
+
-
+ )}
@@ -456,33 +476,32 @@ export function FilterPanel({ onSubmit, isLoading, listingCount }: FilterPanelPr
- {/* Availability */}
+ {/* Availability / Recency */}
- Availability
+ {watchedListingType === ListingType.RENT ? 'Availability' : 'Recency'}
-
(
-
- Available From
-
-
-
-
- Rental listings only
-
-
- )}
- />
+ {watchedListingType === ListingType.RENT && (
+ (
+
+ Available From
+
+
+
+
+ )}
+ />
+ )}
£{property.total_price.toLocaleString()}
- /mo
+ {property.listing_type !== 'BUY' && (
+ /mo
+ )}
{priceIndicator && (
@@ -119,7 +121,9 @@ export function PropertyCard({
£{property.total_price.toLocaleString()}
- /mo
+ {property.listing_type !== 'BUY' && (
+ /mo
+ )}
{priceIndicator && (
@@ -145,10 +149,18 @@ export function PropertyCard({
£{property.qmprice}/m²
-
-
- Available {property.available_from}
-
+ {property.listing_type !== 'BUY' && property.available_from && (
+
+
+ Available {property.available_from}
+
+ )}
+ {property.listing_type === 'BUY' && (
+
+
+ Seen {lastSeenDays}d ago
+
+ )}
{/* Agency and last seen */}
diff --git a/crawler/frontend/src/types/index.ts b/crawler/frontend/src/types/index.ts
new file mode 100644
index 0000000..f538af3
--- /dev/null
+++ b/crawler/frontend/src/types/index.ts
@@ -0,0 +1,71 @@
+// TypeScript types for the frontend application
+
+// GeoJSON types
+export interface PropertyPriceHistory {
+ id: number;
+ price: number;
+ last_seen: string;
+}
+
+export interface PropertyProperties {
+ url: string;
+ city: string;
+ country: string;
+ qm: number;
+ qmprice: number;
+ total_price: number;
+ rooms: number;
+ agency: string;
+ available_from: string;
+ last_seen: string;
+ photo_thumbnail: string;
+ price_history: PropertyPriceHistory[];
+ listing_type?: 'RENT' | 'BUY';
+}
+
+export interface PropertyFeature {
+ type: 'Feature';
+ geometry: {
+ type: 'Point';
+ coordinates: [number, number]; // [longitude, latitude]
+ };
+ properties: PropertyProperties;
+}
+
+export interface GeoJSONFeatureCollection {
+ type: 'FeatureCollection';
+ features: PropertyFeature[];
+}
+
+// Task status types
+export enum TaskStatus {
+ PENDING = 'PENDING',
+ STARTED = 'STARTED',
+ SUCCESS = 'SUCCESS',
+ FAILURE = 'FAILURE',
+ REVOKED = 'REVOKED',
+}
+
+export interface TaskStatusResponse {
+ status: TaskStatus;
+ result: string; // JSON string containing { progress: number }
+}
+
+export interface TaskResult {
+ progress: number;
+}
+
+export interface RefreshListingsResponse {
+ task_id: string;
+}
+
+// API error type
+export class ApiError extends Error {
+ constructor(
+ message: string,
+ public statusCode: number
+ ) {
+ super(message);
+ this.name = 'ApiError';
+ }
+}
diff --git a/crawler/repositories/listing_repository.py b/crawler/repositories/listing_repository.py
index c2bcaf8..187998c 100644
--- a/crawler/repositories/listing_repository.py
+++ b/crawler/repositories/listing_repository.py
@@ -1,5 +1,6 @@
from datetime import datetime, timedelta
import logging
+from typing import Generator
from data_access import Listing
from models.listing import (
BuyListing,
@@ -9,13 +10,20 @@ from models.listing import (
QueryParameters,
RentListing,
)
-from sqlalchemy import Engine
+from sqlalchemy import Engine, func, select as sa_select
from sqlmodel import Session, select
from sqlmodel.sql.expression import SelectOfScalar
from tqdm import tqdm
logger = logging.getLogger("uvicorn.error")
+# Columns needed for GeoJSON streaming (excludes routing_info_json, additional_info)
+STREAMING_COLUMNS = [
+ 'id', 'price', 'number_of_bedrooms', 'square_meters',
+ 'longitude', 'latitude', 'photo_thumbnail', 'last_seen',
+ 'agency', 'price_history_json', 'available_from'
+]
+
class ListingRepository:
engine: Engine
@@ -58,6 +66,147 @@ class ListingRepository:
logging.debug(f"Found {len(rows)} listings")
return rows
+ def stream_listings(
+ self,
+ query_parameters: QueryParameters | None = None,
+ limit: int | None = None,
+ chunk_size: int = 100,
+ ) -> Generator[modelListing, None, None]:
+ """Yield listings one at a time for streaming.
+
+ Uses yield_per for memory-efficient iteration over large result sets.
+
+ Args:
+ query_parameters: Filtering parameters
+ limit: Maximum number of listings to yield
+ chunk_size: Number of rows to fetch at a time from the database
+ """
+ model = RentListing # if no query params, default to renting listings
+ if query_parameters:
+ model = (
+ RentListing
+ if query_parameters.listing_type == ListingType.RENT
+ else BuyListing
+ )
+
+ query = select(model)
+ query = self._add_where_from_query_parameters(query, model, query_parameters)
+ if limit:
+ query = query.limit(limit)
+
+ with Session(self.engine) as session:
+ for listing in session.exec(query).yield_per(chunk_size):
+ yield listing
+
+ def _get_model_for_query(
+ self, query_parameters: QueryParameters | None
+ ) -> type[RentListing] | type[BuyListing]:
+ """Get the appropriate model class based on query parameters."""
+ if query_parameters and query_parameters.listing_type == ListingType.BUY:
+ return BuyListing
+ return RentListing
+
+ def count_listings(self, query_parameters: QueryParameters | None = None) -> int:
+ """Fast count for progress estimation."""
+ model = self._get_model_for_query(query_parameters)
+
+ query = sa_select(func.count(model.id))
+ query = self._add_where_from_query_parameters_raw(query, model, query_parameters)
+
+ with Session(self.engine) as session:
+ return session.execute(query).scalar() or 0
+
+ def stream_listings_optimized(
+ self,
+ query_parameters: QueryParameters | None = None,
+ limit: int | None = None,
+ page_size: int = 100,
+ ) -> Generator[dict, None, None]:
+ """Stream listings with keyset pagination and column projection.
+
+ Uses keyset pagination for O(1) performance at any offset, and only
+ fetches columns needed for GeoJSON (excludes large JSON blobs).
+
+ Args:
+ query_parameters: Filtering parameters
+ limit: Maximum number of listings to yield
+ page_size: Number of rows to fetch per database round-trip
+ """
+ model = self._get_model_for_query(query_parameters)
+
+ # Select only needed columns (excludes routing_info_json, additional_info)
+ columns = [
+ getattr(model, col) for col in STREAMING_COLUMNS if hasattr(model, col)
+ ]
+
+ last_id: int | None = None
+ total_yielded = 0
+
+ while True:
+ if limit and total_yielded >= limit:
+ break
+
+ query = sa_select(*columns)
+ query = self._add_where_from_query_parameters_raw(
+ query, model, query_parameters
+ )
+
+ # Keyset pagination: WHERE id > last_id (O(1) performance)
+ if last_id is not None:
+ query = query.where(model.id > last_id)
+
+ batch_limit = page_size
+ if limit:
+ batch_limit = min(page_size, limit - total_yielded)
+ query = query.order_by(model.id).limit(batch_limit)
+
+ with Session(self.engine) as session:
+ results = session.execute(query).fetchall()
+
+ if not results:
+ break
+
+ for row in results:
+ yield row._asdict()
+ last_id = row.id
+ total_yielded += 1
+
+ if len(results) < page_size:
+ break
+
+ def _add_where_from_query_parameters_raw(
+ self,
+ query,
+ model: type[RentListing] | type[BuyListing],
+ query_parameters: QueryParameters | None = None,
+ ):
+ """Add WHERE clauses from query parameters (for raw SQLAlchemy selects)."""
+ if query_parameters is None:
+ return query
+ query = query.where(
+ model.number_of_bedrooms.between(
+ query_parameters.min_bedrooms, query_parameters.max_bedrooms
+ ),
+ model.price.between(query_parameters.min_price, query_parameters.max_price),
+ )
+ if query_parameters.min_sqm is not None:
+ query = query.where(model.square_meters >= query_parameters.min_sqm)
+ if query_parameters.furnish_types and model == RentListing:
+ query = query.where(model.furnish_type.in_(query_parameters.furnish_types))
+ if (
+ model == RentListing
+ and query_parameters.let_date_available_from is not None
+ ):
+ query = query.where(
+ model.available_from >= query_parameters.let_date_available_from
+ )
+ if query_parameters.last_seen_days is not None:
+ last_seen_threshold = datetime.now() - timedelta(
+ days=query_parameters.last_seen_days
+ )
+ query = query.where(model.last_seen >= last_seen_threshold)
+ return query
+
def _add_where_from_query_parameters(
self,
query: SelectOfScalar[Listing],
@@ -74,7 +223,7 @@ class ListingRepository:
)
if query_parameters.min_sqm is not None:
query = query.where(model.square_meters >= query_parameters.min_sqm)
- if query_parameters.furnish_types:
+ if query_parameters.furnish_types and model == RentListing:
query = query.where(model.furnish_type.in_(query_parameters.furnish_types))
if (
isinstance(model, RentListing)
diff --git a/crawler/ui_exporter.py b/crawler/ui_exporter.py
index 173306f..8636d9f 100644
--- a/crawler/ui_exporter.py
+++ b/crawler/ui_exporter.py
@@ -1,13 +1,122 @@
import json
import logging
import pathlib
+from typing import Any
-from models.listing import QueryParameters
+from models.listing import QueryParameters, RentListing, BuyListing
from repositories.listing_repository import ListingRepository
logger = logging.getLogger("uvicorn.error")
+def convert_row_to_geojson(row: dict[str, Any], listing_type: str = "RENT") -> dict[str, Any]:
+ """Convert a projected row dict to GeoJSON Feature format.
+
+ This function handles dict rows from stream_listings_optimized(),
+ which uses column projection and returns dicts instead of model instances.
+
+ Args:
+ row: A dict with keys matching STREAMING_COLUMNS
+
+ Returns:
+ A GeoJSON Feature dict with properties and geometry
+ """
+ # Parse price history from JSON string
+ price_history = []
+ if row.get('price_history_json'):
+ parsed = json.loads(row['price_history_json'])
+ price_history = [
+ {
+ "first_seen": p["first_seen"],
+ "last_seen": p["last_seen"],
+ "price": p["price"]
+ }
+ for p in parsed
+ ]
+
+ sqm = row.get('square_meters')
+ price = row['price']
+
+ # Handle available_from which may be a datetime or None
+ available_from_val = row.get('available_from')
+ available_from_str = None
+ if available_from_val is not None:
+ if hasattr(available_from_val, 'isoformat'):
+ available_from_str = available_from_val.isoformat()
+ else:
+ available_from_str = str(available_from_val)
+
+ # Handle last_seen which should be a datetime
+ last_seen_val = row['last_seen']
+ if hasattr(last_seen_val, 'isoformat'):
+ last_seen_str = last_seen_val.isoformat()
+ else:
+ last_seen_str = str(last_seen_val)
+
+ return {
+ "type": "Feature",
+ "properties": {
+ "listing_type": listing_type,
+ "city": "London",
+ "country": "United Kingdom",
+ "qm": sqm,
+ "qmprice": round(price / sqm, 2) if sqm else None,
+ "rooms": row['number_of_bedrooms'],
+ "total_price": price,
+ "url": f"https://www.rightmove.co.uk/properties/{row['id']}",
+ "photo_thumbnail": row.get('photo_thumbnail'),
+ "last_seen": last_seen_str,
+ "price_history": price_history,
+ "agency": row.get('agency'),
+ "available_from": available_from_str,
+ },
+ "geometry": {
+ "coordinates": [row['longitude'], row['latitude']],
+ "type": "Point",
+ },
+ }
+
+
+def convert_to_geojson_feature(listing: RentListing | BuyListing) -> dict[str, Any]:
+ """Convert a single listing to GeoJSON Feature format.
+
+ Args:
+ listing: A RentListing or BuyListing model instance
+
+ Returns:
+ A GeoJSON Feature dict with properties and geometry
+ """
+ # Safely access nested additional_info
+ property_info = listing.additional_info.get("property", {}) if listing.additional_info else {}
+ listing_type = "RENT" if isinstance(listing, RentListing) else "BUY"
+
+ return {
+ "type": "Feature",
+ "properties": {
+ "listing_type": listing_type,
+ "city": "London", # change me
+ "country": "United Kingdom",
+ "qm": listing.square_meters,
+ "qmprice": listing.price_per_square_meter,
+ "rooms": listing.number_of_bedrooms,
+ "total_price": listing.price,
+ "url": listing.url,
+ "photo_thumbnail": listing.photo_thumbnail,
+ "last_seen": listing.last_seen.isoformat(),
+ "price_history": [item.to_dict() for item in listing.price_history],
+ "agency": listing.agency,
+ "available_from": property_info.get("letDateAvailable", None),
+ },
+ "geometry": {
+ "coordinates": [
+ listing.longitude,
+ listing.latitude,
+ ],
+ "type": "Point",
+ },
+ }
+
+
async def export_immoweb(
repository: ListingRepository,
output_file: str | None = None,
@@ -20,39 +129,8 @@ async def export_immoweb(
)
logger.info(f"Fetched {len(listings)} listings")
- # Convert listings to immoweb format
- immoweb_listings = []
- for listing in listings:
- immoweb_listing = {
- "type": "Feature",
- "properties": {
- "city": "London", # change me
- "country": "United Kingdom",
- "qm": listing.square_meters,
- "qmprice": listing.price_per_square_meter,
- "rooms": listing.number_of_bedrooms,
- "total_price": listing.price,
- "url": listing.url,
- "photo_thumbnail": listing.photo_thumbnail,
- "last_seen": listing.last_seen.isoformat(),
- "price_history": [item.to_dict() for item in listing.price_history],
- "agency": listing.agency,
- "available_from": listing.additional_info["property"].get(
- "letDateAvailable", None
- ),
- # All other crap can be found in additional_info
- # Prefer pulling out fields here instead of exporting the entire additional_info
- # "info": listing.additional_info,
- },
- "geometry": {
- "coordinates": [
- listing.longitude,
- listing.latitude,
- ],
- "type": "Point",
- },
- }
- immoweb_listings.append(immoweb_listing)
+ # Convert listings to GeoJSON features using the helper function
+ immoweb_listings = [convert_to_geojson_feature(listing) for listing in listings]
prefix = "var data = "
serialized_data = {"type": "FeatureCollection", "features": immoweb_listings}