Remove 1000-result limit, add Redis caching and virtual scrolling

- Remove hard-coded limit=1000 default from listing_geojson and streaming
  endpoints, allowing all matching results to be returned
- Add Redis caching service (db=2, 30min TTL) that caches query results
  as Redis Lists for fast re-queries with reduced DB load
- Integrate cache into streaming endpoint: serve from cache on hit,
  populate cache on miss during DB streaming
- Invalidate cache after scrape completes (both success and no-new-listings)
- Replace ScrollArea with react-virtuoso in ListView for virtual scrolling,
  keeping only ~20-30 DOM nodes regardless of list size
- Handle metadata streaming message to show "0 / N" progress from start
- Throttle frontend state updates with requestAnimationFrame to prevent
  UI jank from rapid re-renders during cached response streaming
This commit is contained in:
Viktor Barzin 2026-02-06 20:34:50 +00:00 committed by Viktor Barzin
parent c4b11ccfe9
commit 5514fa6381
8 changed files with 695 additions and 78 deletions

View file

@ -0,0 +1,99 @@
"""Redis-based caching for listing GeoJSON query results."""
import hashlib
import json
import logging
import os
from typing import Generator
import redis
from models.listing import QueryParameters
logger = logging.getLogger("uvicorn.error")
CACHE_PREFIX = "listings:geojson:"
CACHE_TTL_SECONDS = 30 * 60 # 30 minutes
CACHE_DB = 2
def _get_redis_client() -> redis.Redis:
"""Get Redis client using Celery broker URL but overriding to db=2."""
broker_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
# Replace the db number in the URL
base_url = broker_url.rsplit("/", 1)[0]
return redis.from_url(f"{base_url}/{CACHE_DB}", decode_responses=True)
def make_cache_key(query_params: QueryParameters) -> str:
"""Generate a cache key from query parameters."""
params_json = query_params.model_dump_json()
hash_suffix = hashlib.sha256(params_json.encode()).hexdigest()[:16]
return f"{CACHE_PREFIX}{hash_suffix}"
def get_cached_count(query_params: QueryParameters) -> int | None:
"""Return the number of cached features for a query, or None if not cached."""
try:
client = _get_redis_client()
key = make_cache_key(query_params)
if not client.exists(key):
return None
return client.llen(key)
except redis.RedisError as e:
logger.warning(f"Redis cache read error: {e}")
return None
def get_cached_features(
query_params: QueryParameters, batch_size: int = 50
) -> Generator[list[dict], None, None]:
"""Yield batches of cached GeoJSON features."""
try:
client = _get_redis_client()
key = make_cache_key(query_params)
total = client.llen(key)
for start in range(0, total, batch_size):
end = start + batch_size - 1
items = client.lrange(key, start, end)
batch = [json.loads(item) for item in items]
if batch:
yield batch
except redis.RedisError as e:
logger.warning(f"Redis cache read error during streaming: {e}")
def cache_features_batch(query_params: QueryParameters, features: list[dict]) -> None:
"""Append a batch of features to the cache list."""
if not features:
return
try:
client = _get_redis_client()
key = make_cache_key(query_params)
pipeline = client.pipeline()
for feature in features:
pipeline.rpush(key, json.dumps(feature))
# Set/refresh TTL
pipeline.expire(key, CACHE_TTL_SECONDS)
pipeline.execute()
except redis.RedisError as e:
logger.warning(f"Redis cache write error: {e}")
def invalidate_cache() -> None:
"""Delete all listing GeoJSON cache entries."""
try:
client = _get_redis_client()
cursor = 0
deleted = 0
while True:
cursor, keys = client.scan(cursor, match=f"{CACHE_PREFIX}*", count=100)
if keys:
client.delete(*keys)
deleted += len(keys)
if cursor == 0:
break
if deleted:
logger.info(f"Invalidated {deleted} listing cache entries")
except redis.RedisError as e:
logger.warning(f"Redis cache invalidation error: {e}")