From 5514fa638135f51e36f47b2e702a3b4cf669af5c Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 6 Feb 2026 20:34:50 +0000 Subject: [PATCH] Remove 1000-result limit, add Redis caching and virtual scrolling - Remove hard-coded limit=1000 default from listing_geojson and streaming endpoints, allowing all matching results to be returned - Add Redis caching service (db=2, 30min TTL) that caches query results as Redis Lists for fast re-queries with reduced DB load - Integrate cache into streaming endpoint: serve from cache on hit, populate cache on miss during DB streaming - Invalidate cache after scrape completes (both success and no-new-listings) - Replace ScrollArea with react-virtuoso in ListView for virtual scrolling, keeping only ~20-30 DOM nodes regardless of list size - Handle metadata streaming message to show "0 / N" progress from start - Throttle frontend state updates with requestAnimationFrame to prevent UI jank from rapid re-renders during cached response streaming --- crawler/api/app.py | 100 ++++--- crawler/frontend/package-lock.json | 252 +++++++++++++++--- crawler/frontend/package.json | 6 +- crawler/frontend/src/App.tsx | 24 +- crawler/frontend/src/components/ListView.tsx | 151 +++++++++++ .../frontend/src/services/streamingService.ts | 137 ++++++++++ crawler/services/listing_cache.py | 99 +++++++ crawler/tasks/listing_tasks.py | 4 + 8 files changed, 695 insertions(+), 78 deletions(-) create mode 100644 crawler/frontend/src/components/ListView.tsx create mode 100644 crawler/frontend/src/services/streamingService.ts create mode 100644 crawler/services/listing_cache.py diff --git a/crawler/api/app.py b/crawler/api/app.py index 96f8446..9dd3988 100644 --- a/crawler/api/app.py +++ b/crawler/api/app.py @@ -18,6 +18,11 @@ from fastapi.middleware.cors import CORSMiddleware from ui_exporter import convert_to_geojson_feature, convert_row_to_geojson from services import listing_service, export_service, district_service, task_service +from services.listing_cache import ( + get_cached_count, + get_cached_features, + cache_features_batch, +) from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor from api.metrics import metrics_app from opentelemetry.metrics import get_meter @@ -101,7 +106,7 @@ async def get_listing( async def get_listing_geojson( user: Annotated[User, Depends(get_current_user)], query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)], - limit: int = 1000, # Default limit to prevent timeout + limit: int | None = None, ) -> dict: """Get listings as GeoJSON for map display.""" repository = ListingRepository(engine) @@ -118,7 +123,7 @@ async def stream_listing_geojson( user: Annotated[User, Depends(get_current_user)], query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)], batch_size: int = 50, - limit: int = 1000, + limit: int | None = None, ) -> StreamingResponse: """Stream listings as NDJSON for progressive map loading. @@ -128,38 +133,67 @@ async def stream_listing_geojson( - complete: Final message with total count """ async def generate(): - repository = ListingRepository(engine) + # Check cache first + cached_count = get_cached_count(query_parameters) - # Phase 1: Fast count for progress estimation - total = repository.count_listings(query_parameters) - effective_total = min(limit, total) if limit else total + if cached_count is not None and cached_count > 0: + # Cache HIT + effective_total = min(limit, cached_count) if limit else cached_count - yield json.dumps({ - "type": "metadata", - "batch_size": batch_size, - "total_expected": effective_total, - }) + "\n" + yield json.dumps({ + "type": "metadata", + "batch_size": batch_size, + "total_expected": effective_total, + "cached": True, + }) + "\n" - # Phase 2: Stream with column projection and keyset pagination - count = 0 - batch = [] - for row in repository.stream_listings_optimized( - query_parameters, limit=limit, page_size=batch_size - ): - feature = convert_row_to_geojson(row, query_parameters.listing_type.value) - batch.append(feature) - count += 1 + count = 0 + for feature_batch in get_cached_features(query_parameters, batch_size=batch_size): + if limit and count + len(feature_batch) > limit: + feature_batch = feature_batch[:limit - count] + count += len(feature_batch) + yield json.dumps({"type": "batch", "features": feature_batch}) + "\n" + if limit and count >= limit: + break - if len(batch) >= batch_size: + yield json.dumps({"type": "complete", "total": count}) + "\n" + else: + # Cache MISS - query DB and populate cache + repository = ListingRepository(engine) + + # Phase 1: Fast count for progress estimation + total = repository.count_listings(query_parameters) + effective_total = min(limit, total) if limit else total + + yield json.dumps({ + "type": "metadata", + "batch_size": batch_size, + "total_expected": effective_total, + "cached": False, + }) + "\n" + + # Phase 2: Stream with column projection and keyset pagination + count = 0 + batch = [] + for row in repository.stream_listings_optimized( + query_parameters, limit=limit, page_size=batch_size + ): + feature = convert_row_to_geojson(row, query_parameters.listing_type.value) + batch.append(feature) + count += 1 + + if len(batch) >= batch_size: + cache_features_batch(query_parameters, batch) + yield json.dumps({"type": "batch", "features": batch}) + "\n" + batch = [] + + # Send remaining + if batch: + cache_features_batch(query_parameters, batch) yield json.dumps({"type": "batch", "features": batch}) + "\n" - batch = [] - # Send remaining - if batch: - yield json.dumps({"type": "batch", "features": batch}) + "\n" - - # Final message - yield json.dumps({"type": "complete", "total": count}) + "\n" + # Final message + yield json.dumps({"type": "complete", "total": count}) + "\n" return StreamingResponse( generate(), @@ -200,13 +234,19 @@ async def refresh_listings( async def get_task_status( user: Annotated[User, Depends(get_current_user)], task_id: str, -) -> dict[str, str]: +) -> dict[str, str | int | float | None]: """Get the status of a background task.""" status = task_service.get_task_status(task_id) return { "task_id": status.task_id, "status": status.status, - "result": json.dumps(status.result) if status.result else "", + "result": json.dumps(status.result) if status.result else None, + "progress": status.progress, + "processed": status.processed, + "total": status.total, + "message": status.message, + "error": status.error, + "traceback": status.traceback, } diff --git a/crawler/frontend/package-lock.json b/crawler/frontend/package-lock.json index c3cbbde..07c0944 100644 --- a/crawler/frontend/package-lock.json +++ b/crawler/frontend/package-lock.json @@ -9,7 +9,9 @@ "version": "0.0.0", "dependencies": { "@hookform/resolvers": "^5.1.1", + "@radix-ui/react-accordion": "^1.2.12", "@radix-ui/react-alert-dialog": "^1.1.14", + "@radix-ui/react-checkbox": "^1.3.3", "@radix-ui/react-dialog": "^1.1.14", "@radix-ui/react-hover-card": "^1.1.14", "@radix-ui/react-label": "^2.1.7", @@ -18,6 +20,7 @@ "@radix-ui/react-scroll-area": "^1.2.9", "@radix-ui/react-select": "^2.2.5", "@radix-ui/react-separator": "^1.1.7", + "@radix-ui/react-slider": "^1.3.6", "@radix-ui/react-slot": "^1.2.3", "@radix-ui/react-tooltip": "^1.2.7", "@tabler/icons-react": "^3.34.0", @@ -40,7 +43,7 @@ "react-dom": "^19.1.0", "react-hook-form": "^7.58.1", "react-oidc-context": "^3.3.0", - "rivets": "^0.9.6", + "react-virtuoso": "^4.18.1", "tailwind-merge": "^3.3.1", "tailwindcss": "^4.1.10", "zod": "^3.25.67" @@ -50,7 +53,6 @@ "@types/node": "^24.0.1", "@types/react": "^19.1.2", "@types/react-dom": "^19.1.2", - "@types/rivets": "^0.9.5", "@vitejs/plugin-react-swc": "^3.9.0", "eslint": "^9.25.0", "eslint-plugin-react-hooks": "^5.2.0", @@ -924,6 +926,43 @@ "integrity": "sha512-XnbHrrprsNqZKQhStrSwgRUQzoCI1glLzdw79xiZPoofhGICeZRSQ3dIxAKH1gb3OHfNf4d6f+vAv3kil2eggA==", "license": "MIT" }, + "node_modules/@radix-ui/react-accordion": { + "version": "1.2.12", + "resolved": "https://registry.npmjs.org/@radix-ui/react-accordion/-/react-accordion-1.2.12.tgz", + "integrity": "sha512-T4nygeh9YE9dLRPhAHSeOZi7HBXo+0kYIPJXayZfvWOWA0+n3dESrZbjfDPUABkUNym6Hd+f2IR113To8D2GPA==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-collapsible": "1.1.12", + "@radix-ui/react-collection": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/primitive": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz", + "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==", + "license": "MIT" + }, "node_modules/@radix-ui/react-alert-dialog": { "version": "1.1.14", "resolved": "https://registry.npmjs.org/@radix-ui/react-alert-dialog/-/react-alert-dialog-1.1.14.tgz", @@ -975,6 +1014,126 @@ } } }, + "node_modules/@radix-ui/react-checkbox": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-checkbox/-/react-checkbox-1.3.3.tgz", + "integrity": "sha512-wBbpv+NQftHDdG86Qc0pIyXk5IR3tM8Vd0nWLKDcX8nNn4nXFOFwsKuqw2okA/1D/mpaAkmuyndrPJTYDNZtFw==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-use-previous": "1.1.1", + "@radix-ui/react-use-size": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-checkbox/node_modules/@radix-ui/primitive": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz", + "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==", + "license": "MIT" + }, + "node_modules/@radix-ui/react-checkbox/node_modules/@radix-ui/react-presence": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", + "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-collapsible": { + "version": "1.1.12", + "resolved": "https://registry.npmjs.org/@radix-ui/react-collapsible/-/react-collapsible-1.1.12.tgz", + "integrity": "sha512-Uu+mSh4agx2ib1uIGPP4/CKNULyajb3p92LsVXmH2EHVMTfZWpll88XJ0j4W0z3f8NK1eYl1+Mf/szHPmcHzyA==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/primitive": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz", + "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==", + "license": "MIT" + }, + "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/react-presence": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", + "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-collection": { "version": "1.1.7", "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz", @@ -1482,6 +1641,45 @@ } } }, + "node_modules/@radix-ui/react-slider": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/@radix-ui/react-slider/-/react-slider-1.3.6.tgz", + "integrity": "sha512-JPYb1GuM1bxfjMRlNLE+BcmBC8onfCi60Blk7OBqi2MLTFdS+8401U4uFjnwkOr49BLmXxLC6JHkvAsx5OJvHw==", + "license": "MIT", + "dependencies": { + "@radix-ui/number": "1.1.1", + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-collection": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-use-layout-effect": "1.1.1", + "@radix-ui/react-use-previous": "1.1.1", + "@radix-ui/react-use-size": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-slider/node_modules/@radix-ui/primitive": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz", + "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==", + "license": "MIT" + }, "node_modules/@radix-ui/react-slot": { "version": "1.2.3", "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", @@ -2773,16 +2971,6 @@ "@types/geojson": "*" } }, - "node_modules/@types/jquery": { - "version": "3.5.32", - "resolved": "https://registry.npmjs.org/@types/jquery/-/jquery-3.5.32.tgz", - "integrity": "sha512-b9Xbf4CkMqS02YH8zACqN1xzdxc3cO735Qe5AbSUFmyOiaWAbcpqh9Wna+Uk0vgACvoQHpWDg2rGdHkYPLmCiQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/sizzle": "*" - } - }, "node_modules/@types/json-schema": { "version": "7.0.15", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", @@ -2852,23 +3040,6 @@ "@types/react": "^19.0.0" } }, - "node_modules/@types/rivets": { - "version": "0.9.5", - "resolved": "https://registry.npmjs.org/@types/rivets/-/rivets-0.9.5.tgz", - "integrity": "sha512-spCtZoSOrS8kNTJNOXamCCQurqOdF1Piak8bUQVqHQNRoTLoID6O6xVX41P5W2vvlxc9UpSG75zl4CRra0l3Eg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/jquery": "*" - } - }, - "node_modules/@types/sizzle": { - "version": "2.3.9", - "resolved": "https://registry.npmjs.org/@types/sizzle/-/sizzle-2.3.9.tgz", - "integrity": "sha512-xzLEyKB50yqCUPUJkIsrVvoWNfFUbIZI+RspLWt8u+tIW/BetMBZtgV2LY/2o+tYH8dRvQ+eoPf3NdhQCcLE2w==", - "dev": true, - "license": "MIT" - }, "node_modules/@types/supercluster": { "version": "7.1.3", "resolved": "https://registry.npmjs.org/@types/supercluster/-/supercluster-7.1.3.tgz", @@ -5348,6 +5519,16 @@ } } }, + "node_modules/react-virtuoso": { + "version": "4.18.1", + "resolved": "https://registry.npmjs.org/react-virtuoso/-/react-virtuoso-4.18.1.tgz", + "integrity": "sha512-KF474cDwaSb9+SJ380xruBB4P+yGWcVkcu26HtMqYNMTYlYbrNy8vqMkE+GpAApPPufJqgOLMoWMFG/3pJMXUA==", + "license": "MIT", + "peerDependencies": { + "react": ">=16 || >=17 || >= 18 || >= 19", + "react-dom": ">=16 || >=17 || >= 18 || >=19" + } + }, "node_modules/resolve-from": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", @@ -5378,14 +5559,6 @@ "node": ">=0.10.0" } }, - "node_modules/rivets": { - "version": "0.9.6", - "resolved": "https://registry.npmjs.org/rivets/-/rivets-0.9.6.tgz", - "integrity": "sha512-KfdMjLRWw4+38ej9bRXegKZVfYo0jEacwadA5z6NTKya+YohwGemwdbxvJ52WCXODkTnR4Q8UmUC6HVxsdzkxA==", - "dependencies": { - "sightglass": "~0.2.4" - } - }, "node_modules/robust-predicates": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.2.tgz", @@ -5524,11 +5697,6 @@ "node": ">=8" } }, - "node_modules/sightglass": { - "version": "0.2.6", - "resolved": "https://registry.npmjs.org/sightglass/-/sightglass-0.2.6.tgz", - "integrity": "sha512-t1fgbuhURcWc8VgZk8kJQ3QmmZk3kghDcf0wpsN8I8RaV05IUkc2b195KpGqgocKT/q8+vKk6EcB2c7N2lAd6A==" - }, "node_modules/source-map-js": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", diff --git a/crawler/frontend/package.json b/crawler/frontend/package.json index 466d325..a9c66a3 100644 --- a/crawler/frontend/package.json +++ b/crawler/frontend/package.json @@ -11,7 +11,9 @@ }, "dependencies": { "@hookform/resolvers": "^5.1.1", + "@radix-ui/react-accordion": "^1.2.12", "@radix-ui/react-alert-dialog": "^1.1.14", + "@radix-ui/react-checkbox": "^1.3.3", "@radix-ui/react-dialog": "^1.1.14", "@radix-ui/react-hover-card": "^1.1.14", "@radix-ui/react-label": "^2.1.7", @@ -20,6 +22,7 @@ "@radix-ui/react-scroll-area": "^1.2.9", "@radix-ui/react-select": "^2.2.5", "@radix-ui/react-separator": "^1.1.7", + "@radix-ui/react-slider": "^1.3.6", "@radix-ui/react-slot": "^1.2.3", "@radix-ui/react-tooltip": "^1.2.7", "@tabler/icons-react": "^3.34.0", @@ -42,7 +45,7 @@ "react-dom": "^19.1.0", "react-hook-form": "^7.58.1", "react-oidc-context": "^3.3.0", - "rivets": "^0.9.6", + "react-virtuoso": "^4.18.1", "tailwind-merge": "^3.3.1", "tailwindcss": "^4.1.10", "zod": "^3.25.67" @@ -52,7 +55,6 @@ "@types/node": "^24.0.1", "@types/react": "^19.1.2", "@types/react-dom": "^19.1.2", - "@types/rivets": "^0.9.5", "@vitejs/plugin-react-swc": "^3.9.0", "eslint": "^9.25.0", "eslint-plugin-react-hooks": "^5.2.0", diff --git a/crawler/frontend/src/App.tsx b/crawler/frontend/src/App.tsx index 5e90bdf..8933d56 100644 --- a/crawler/frontend/src/App.tsx +++ b/crawler/frontend/src/App.tsx @@ -67,16 +67,32 @@ function App() { setStreamingProgress({ count: 0 }); setListingData(null); + let updateScheduled = false; + + const flushUpdate = () => { + updateScheduled = false; + setListingData({ + type: 'FeatureCollection', + features: [...accumulatedFeaturesRef.current] + }); + }; + + const scheduleUpdate = () => { + if (!updateScheduled) { + updateScheduled = true; + requestAnimationFrame(flushUpdate); + } + }; + try { for await (const batch of streamListingGeoJSON(user, parameters, (progress) => { setStreamingProgress(progress); })) { accumulatedFeaturesRef.current.push(...batch); - setListingData({ - type: 'FeatureCollection', - features: [...accumulatedFeaturesRef.current] - }); + scheduleUpdate(); } + // Final flush to ensure all data is rendered + flushUpdate(); } catch (error) { if (error instanceof Error) { setSubmitError(error.message); diff --git a/crawler/frontend/src/components/ListView.tsx b/crawler/frontend/src/components/ListView.tsx new file mode 100644 index 0000000..790146a --- /dev/null +++ b/crawler/frontend/src/components/ListView.tsx @@ -0,0 +1,151 @@ +import { useState, useMemo, useCallback } from 'react'; +import { ArrowUpDown, ArrowUp, ArrowDown } from 'lucide-react'; +import { Virtuoso } from 'react-virtuoso'; +import { Button } from './ui/button'; +import { PropertyCard } from './PropertyCard'; +import type { GeoJSONFeatureCollection, PropertyFeature, PropertyProperties } from '@/types'; + +type SortField = 'total_price' | 'qmprice' | 'qm' | 'rooms' | 'last_seen'; +type SortOrder = 'asc' | 'desc'; + +interface ListViewProps { + listingData: GeoJSONFeatureCollection; + onPropertyClick?: (property: PropertyProperties, coordinates: [number, number]) => void; + highlightedPropertyUrl?: string | null; +} + +interface SortConfig { + field: SortField; + order: SortOrder; +} + +const SORT_OPTIONS: { field: SortField; label: string }[] = [ + { field: 'total_price', label: 'Price' }, + { field: 'qmprice', label: '£/m²' }, + { field: 'qm', label: 'Size' }, + { field: 'rooms', label: 'Beds' }, + { field: 'last_seen', label: 'Last Seen' }, +]; + +export function ListView({ listingData, onPropertyClick, highlightedPropertyUrl }: ListViewProps) { + const [sortConfig, setSortConfig] = useState({ field: 'qmprice', order: 'asc' }); + + // Calculate average price per sqm for "good deal" indicator + const avgPricePerSqm = useMemo(() => { + const validPrices = listingData.features + .map((f) => f.properties.qmprice) + .filter((p): p is number => typeof p === 'number' && p > 0); + return validPrices.length > 0 + ? validPrices.reduce((a, b) => a + b, 0) / validPrices.length + : 0; + }, [listingData]); + + // Sort features + const sortedFeatures = useMemo(() => { + const features = [...listingData.features]; + + features.sort((a, b) => { + let aValue: number | string; + let bValue: number | string; + + switch (sortConfig.field) { + case 'total_price': + aValue = a.properties.total_price || 0; + bValue = b.properties.total_price || 0; + break; + case 'qmprice': + aValue = a.properties.qmprice || 0; + bValue = b.properties.qmprice || 0; + break; + case 'qm': + aValue = a.properties.qm || 0; + bValue = b.properties.qm || 0; + break; + case 'rooms': + aValue = a.properties.rooms || 0; + bValue = b.properties.rooms || 0; + break; + case 'last_seen': + aValue = new Date(a.properties.last_seen).getTime(); + bValue = new Date(b.properties.last_seen).getTime(); + break; + default: + return 0; + } + + if (typeof aValue === 'number' && typeof bValue === 'number') { + return sortConfig.order === 'asc' ? aValue - bValue : bValue - aValue; + } + return 0; + }); + + return features; + }, [listingData.features, sortConfig]); + + const handleSort = (field: SortField) => { + setSortConfig((prev) => ({ + field, + order: prev.field === field && prev.order === 'asc' ? 'desc' : 'asc', + })); + }; + + const handlePropertyClick = useCallback((feature: PropertyFeature) => { + if (onPropertyClick) { + onPropertyClick(feature.properties, feature.geometry.coordinates); + } + }, [onPropertyClick]); + + const SortIcon = ({ field }: { field: SortField }) => { + if (sortConfig.field !== field) { + return ; + } + return sortConfig.order === 'asc' + ? + : ; + }; + + return ( +
+ {/* Sort controls */} +
+ Sort: + {SORT_OPTIONS.map((option) => ( + + ))} +
+ + {/* Listing count */} +
+ Showing {sortedFeatures.length.toLocaleString()} properties +
+ + {/* Property list */} + ( +
+ handlePropertyClick(feature)} + /> +
+ )} + /> +
+ ); +} diff --git a/crawler/frontend/src/services/streamingService.ts b/crawler/frontend/src/services/streamingService.ts new file mode 100644 index 0000000..1c1d69f --- /dev/null +++ b/crawler/frontend/src/services/streamingService.ts @@ -0,0 +1,137 @@ +// Streaming service for progressive listing data loading + +import type { User } from 'oidc-client-ts'; +import type { PropertyFeature } from '@/types'; +import type { ParameterValues } from '@/components/FilterPanel'; +import { ApiError } from '@/types'; +import { API_ENDPOINTS } from '@/constants'; + +/** + * Build query string from parameters object + */ +function buildQueryString(params: Record): string { + const queryString = new URLSearchParams(); + + for (const [key, value] of Object.entries(params)) { + if (value !== undefined && value !== null && value !== '') { + if (value instanceof Date) { + queryString.append(key, value.toISOString()); + } else { + queryString.append(key, String(value)); + } + } + } + + return queryString.toString(); +} + +/** + * Build listing query parameters from form values + */ +function buildListingParams(parameters: ParameterValues): Record { + return { + listing_type: parameters.listing_type, + min_bedrooms: parameters.min_bedrooms, + max_bedrooms: parameters.max_bedrooms, + max_price: parameters.max_price, + min_price: parameters.min_price, + min_sqm: parameters.min_sqm, + max_sqm: parameters.max_sqm, + min_price_per_sqm: parameters.min_price_per_sqm, + max_price_per_sqm: parameters.max_price_per_sqm, + last_seen_days: parameters.last_seen_days, + let_date_available_from: parameters.available_from, + district_names: parameters.district || undefined, + furnish_types: parameters.furnish_types?.join(',') || undefined, + }; +} + +export interface StreamMessage { + type: 'metadata' | 'batch' | 'complete'; + features?: PropertyFeature[]; + total?: number; + total_expected?: number; + batch_size?: number; + cached?: boolean; +} + +export interface StreamingProgress { + count: number; + total?: number; +} + +/** + * Stream listing GeoJSON data as an async generator. + * Yields batches of features as they arrive from the server. + */ +export async function* streamListingGeoJSON( + user: User, + parameters: ParameterValues, + onProgress?: (progress: StreamingProgress) => void +): AsyncGenerator { + const params = buildListingParams(parameters); + const queryString = buildQueryString(params); + const url = queryString + ? `${API_ENDPOINTS.LISTING_GEOJSON_STREAM}?${queryString}` + : API_ENDPOINTS.LISTING_GEOJSON_STREAM; + + const response = await fetch(url, { + headers: { + Authorization: `Bearer ${user.access_token}`, + }, + }); + + if (!response.ok) { + throw new ApiError(`Error: ${response.status}`, response.status); + } + + if (!response.body) { + throw new Error('No response body'); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + let totalCount = 0; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop() || ''; // Keep incomplete line in buffer + + for (const line of lines) { + if (!line.trim()) continue; + + try { + const message: StreamMessage = JSON.parse(line); + + if (message.type === 'metadata') { + onProgress?.({ count: 0, total: message.total_expected }); + } else if (message.type === 'batch' && message.features) { + totalCount += message.features.length; + onProgress?.({ count: totalCount }); + yield message.features; + } else if (message.type === 'complete') { + onProgress?.({ count: message.total ?? totalCount, total: message.total }); + } + } catch (e) { + console.error('Failed to parse streaming message:', e); + } + } + } + + // Process any remaining data in the buffer + if (buffer.trim()) { + try { + const message: StreamMessage = JSON.parse(buffer); + if (message.type === 'batch' && message.features) { + yield message.features; + } + } catch (e) { + console.error('Failed to parse final streaming message:', e); + } + } +} diff --git a/crawler/services/listing_cache.py b/crawler/services/listing_cache.py new file mode 100644 index 0000000..c77adfc --- /dev/null +++ b/crawler/services/listing_cache.py @@ -0,0 +1,99 @@ +"""Redis-based caching for listing GeoJSON query results.""" +import hashlib +import json +import logging +import os +from typing import Generator + +import redis + +from models.listing import QueryParameters + +logger = logging.getLogger("uvicorn.error") + +CACHE_PREFIX = "listings:geojson:" +CACHE_TTL_SECONDS = 30 * 60 # 30 minutes +CACHE_DB = 2 + + +def _get_redis_client() -> redis.Redis: + """Get Redis client using Celery broker URL but overriding to db=2.""" + broker_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0") + # Replace the db number in the URL + base_url = broker_url.rsplit("/", 1)[0] + return redis.from_url(f"{base_url}/{CACHE_DB}", decode_responses=True) + + +def make_cache_key(query_params: QueryParameters) -> str: + """Generate a cache key from query parameters.""" + params_json = query_params.model_dump_json() + hash_suffix = hashlib.sha256(params_json.encode()).hexdigest()[:16] + return f"{CACHE_PREFIX}{hash_suffix}" + + +def get_cached_count(query_params: QueryParameters) -> int | None: + """Return the number of cached features for a query, or None if not cached.""" + try: + client = _get_redis_client() + key = make_cache_key(query_params) + if not client.exists(key): + return None + return client.llen(key) + except redis.RedisError as e: + logger.warning(f"Redis cache read error: {e}") + return None + + +def get_cached_features( + query_params: QueryParameters, batch_size: int = 50 +) -> Generator[list[dict], None, None]: + """Yield batches of cached GeoJSON features.""" + try: + client = _get_redis_client() + key = make_cache_key(query_params) + total = client.llen(key) + + for start in range(0, total, batch_size): + end = start + batch_size - 1 + items = client.lrange(key, start, end) + batch = [json.loads(item) for item in items] + if batch: + yield batch + except redis.RedisError as e: + logger.warning(f"Redis cache read error during streaming: {e}") + + +def cache_features_batch(query_params: QueryParameters, features: list[dict]) -> None: + """Append a batch of features to the cache list.""" + if not features: + return + try: + client = _get_redis_client() + key = make_cache_key(query_params) + pipeline = client.pipeline() + for feature in features: + pipeline.rpush(key, json.dumps(feature)) + # Set/refresh TTL + pipeline.expire(key, CACHE_TTL_SECONDS) + pipeline.execute() + except redis.RedisError as e: + logger.warning(f"Redis cache write error: {e}") + + +def invalidate_cache() -> None: + """Delete all listing GeoJSON cache entries.""" + try: + client = _get_redis_client() + cursor = 0 + deleted = 0 + while True: + cursor, keys = client.scan(cursor, match=f"{CACHE_PREFIX}*", count=100) + if keys: + client.delete(*keys) + deleted += len(keys) + if cursor == 0: + break + if deleted: + logger.info(f"Invalidated {deleted} listing cache entries") + except redis.RedisError as e: + logger.warning(f"Redis cache invalidation error: {e}") diff --git a/crawler/tasks/listing_tasks.py b/crawler/tasks/listing_tasks.py index 713a56d..60bf2e6 100644 --- a/crawler/tasks/listing_tasks.py +++ b/crawler/tasks/listing_tasks.py @@ -16,6 +16,7 @@ from repositories.listing_repository import ListingRepository from database import engine from services.query_splitter import QuerySplitter, SubQuery from utils.redis_lock import redis_lock +from services.listing_cache import invalidate_cache logger = logging.getLogger("uvicorn.error") @@ -88,6 +89,7 @@ async def dump_listings_full( if len(ids_to_process) == 0: elapsed = time.time() - start_time celery_logger.info(f"No new listings found. Completed in {elapsed:.1f}s") + invalidate_cache() task.update_state( state="No new listings found", meta={"progress": 1, "processed": 0, "total": 0, "message": "All listings are up to date"}, @@ -111,6 +113,8 @@ async def dump_listings_full( celery_logger.info(f"COMPLETED: Processed {len(result)} listings in {elapsed:.1f}s") celery_logger.info("=" * 60) + invalidate_cache() + return result