Remove 1000-result limit, add Redis caching and virtual scrolling

- Remove hard-coded limit=1000 default from listing_geojson and streaming endpoints, allowing all matching results to be returned - Add Redis caching service (db=2, 30min TTL) that caches query results as Redis Lists for fast re-queries with reduced DB load - Integrate cache into streaming endpoint: serve from cache on hit, populate cache on miss during DB streaming - Invalidate cache after scrape completes (both success and no-new-listings) - Replace ScrollArea with react-virtuoso in ListView for virtual scrolling, keeping only ~20-30 DOM nodes regardless of list size - Handle metadata streaming message to show "0 / N" progress from start - Throttle frontend state updates with requestAnimationFrame to prevent UI jank from rapid re-renders during cached response streaming
2026-02-06 20:34:50 +00:00 · 2026-02-06 20:34:50 +00:00 · 5514fa6381
commit 5514fa6381
parent c4b11ccfe9
8 changed files with 695 additions and 78 deletions
--- a/crawler/services/listing_cache.py
+++ b/crawler/services/listing_cache.py
@ -0,0 +1,99 @@
+"""Redis-based caching for listing GeoJSON query results."""
+import hashlib
+import json
+import logging
+import os
+from typing import Generator
+
+import redis
+
+from models.listing import QueryParameters
+
+logger = logging.getLogger("uvicorn.error")
+
+CACHE_PREFIX = "listings:geojson:"
+CACHE_TTL_SECONDS = 30 * 60  # 30 minutes
+CACHE_DB = 2
+
+
+def _get_redis_client() -> redis.Redis:
+    """Get Redis client using Celery broker URL but overriding to db=2."""
+    broker_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
+    # Replace the db number in the URL
+    base_url = broker_url.rsplit("/", 1)[0]
+    return redis.from_url(f"{base_url}/{CACHE_DB}", decode_responses=True)
+
+
+def make_cache_key(query_params: QueryParameters) -> str:
+    """Generate a cache key from query parameters."""
+    params_json = query_params.model_dump_json()
+    hash_suffix = hashlib.sha256(params_json.encode()).hexdigest()[:16]
+    return f"{CACHE_PREFIX}{hash_suffix}"
+
+
+def get_cached_count(query_params: QueryParameters) -> int | None:
+    """Return the number of cached features for a query, or None if not cached."""
+    try:
+        client = _get_redis_client()
+        key = make_cache_key(query_params)
+        if not client.exists(key):
+            return None
+        return client.llen(key)
+    except redis.RedisError as e:
+        logger.warning(f"Redis cache read error: {e}")
+        return None
+
+
+def get_cached_features(
+    query_params: QueryParameters, batch_size: int = 50
+) -> Generator[list[dict], None, None]:
+    """Yield batches of cached GeoJSON features."""
+    try:
+        client = _get_redis_client()
+        key = make_cache_key(query_params)
+        total = client.llen(key)
+
+        for start in range(0, total, batch_size):
+            end = start + batch_size - 1
+            items = client.lrange(key, start, end)
+            batch = [json.loads(item) for item in items]
+            if batch:
+                yield batch
+    except redis.RedisError as e:
+        logger.warning(f"Redis cache read error during streaming: {e}")
+
+
+def cache_features_batch(query_params: QueryParameters, features: list[dict]) -> None:
+    """Append a batch of features to the cache list."""
+    if not features:
+        return
+    try:
+        client = _get_redis_client()
+        key = make_cache_key(query_params)
+        pipeline = client.pipeline()
+        for feature in features:
+            pipeline.rpush(key, json.dumps(feature))
+        # Set/refresh TTL
+        pipeline.expire(key, CACHE_TTL_SECONDS)
+        pipeline.execute()
+    except redis.RedisError as e:
+        logger.warning(f"Redis cache write error: {e}")
+
+
+def invalidate_cache() -> None:
+    """Delete all listing GeoJSON cache entries."""
+    try:
+        client = _get_redis_client()
+        cursor = 0
+        deleted = 0
+        while True:
+            cursor, keys = client.scan(cursor, match=f"{CACHE_PREFIX}*", count=100)
+            if keys:
+                client.delete(*keys)
+                deleted += len(keys)
+            if cursor == 0:
+                break
+        if deleted:
+            logger.info(f"Invalidated {deleted} listing cache entries")
+    except redis.RedisError as e:
+        logger.warning(f"Redis cache invalidation error: {e}")