Fix duplicate listings via staged Redis cache and frontend stream cancellation

Three-pronged fix for duplicate listings appearing in the UI:

1. Backend: Replace direct rpush cache writes with staged population
   (write to temp key, then atomic RENAME to live key). Skip cache
   writes entirely for POI-enriched requests. Clean staging keys on
   invalidation.

2. Frontend: Add AbortController to cancel in-flight streaming requests
   when loadListings is called again, preventing data mixing.

3. Frontend: Deduplicate features by URL during stream accumulation as
   a safety net against any remaining server-side duplicates.
This commit is contained in:
Viktor Barzin 2026-02-09 21:17:30 +00:00
parent 5b8aa98446
commit 73d19e29d5
No known key found for this signature in database
GPG key ID: 0EB088298288D958
5 changed files with 159 additions and 38 deletions

View file

@ -3,6 +3,7 @@ import hashlib
import json
import logging
import os
import uuid
from typing import Generator
from urllib.parse import urlparse, urlunparse
@ -13,7 +14,9 @@ from models.listing import QueryParameters
logger = logging.getLogger(__name__)
CACHE_PREFIX = "listings:geojson:"
STAGING_PREFIX = "listings:geojson:staging:"
CACHE_TTL_SECONDS = 30 * 60 # 30 minutes
STAGING_TTL_SECONDS = 5 * 60 # 5 minutes safety net for orphaned staging keys
CACHE_DB = 2
@ -81,22 +84,71 @@ def cache_features_batch(query_params: QueryParameters, features: list[dict]) ->
logger.warning(f"Redis cache write error: {e}")
def begin_cache_population(query_params: QueryParameters) -> str:
"""Begin staged cache population. Returns a unique staging key.
The staging key gets its TTL set by cache_features_batch_staged on the
first rpush, so no pre-creation is needed here.
"""
return f"{STAGING_PREFIX}{uuid.uuid4().hex}"
def cache_features_batch_staged(staging_key: str, features: list[dict]) -> None:
"""Append a batch of features to a staging key."""
if not features:
return
try:
client = _get_redis_client()
pipeline = client.pipeline()
for feature in features:
pipeline.rpush(staging_key, json.dumps(feature))
pipeline.expire(staging_key, STAGING_TTL_SECONDS)
pipeline.execute()
except redis.RedisError as e:
logger.warning(f"Redis staged cache write error: {e}")
def finalize_cache_population(staging_key: str, query_params: QueryParameters) -> None:
"""Atomically rename the staging key to the live cache key and set TTL."""
try:
client = _get_redis_client()
live_key = make_cache_key(query_params)
# RENAME is atomic — replaces the live key in one operation
client.rename(staging_key, live_key)
client.expire(live_key, CACHE_TTL_SECONDS)
logger.debug(f"Finalized cache population for {live_key}")
except redis.RedisError as e:
logger.warning(f"Redis cache finalize error: {e}")
def delete_staging_key(staging_key: str) -> None:
"""Delete an orphaned staging key (used in error cleanup)."""
try:
client = _get_redis_client()
client.delete(staging_key)
except redis.RedisError as e:
logger.warning(f"Redis staging key cleanup error: {e}")
def invalidate_cache() -> None:
"""Delete all listing GeoJSON cache entries."""
"""Delete all listing GeoJSON cache entries, including staging keys."""
try:
client = _get_redis_client()
cursor = 0
deleted = 0
while True:
cursor, keys = client.scan(cursor, match=f"{CACHE_PREFIX}*", count=100)
if keys:
pipeline = client.pipeline()
for key in keys:
pipeline.delete(key)
pipeline.execute()
deleted += len(keys)
if cursor == 0:
break
# Clean both live cache keys and staging keys
for pattern in [f"{CACHE_PREFIX}*", f"{STAGING_PREFIX}*"]:
cursor = 0
while True:
cursor, keys = client.scan(cursor, match=pattern, count=100)
if keys:
pipeline = client.pipeline()
for key in keys:
pipeline.delete(key)
pipeline.execute()
deleted += len(keys)
if cursor == 0:
break
if deleted:
logger.info(f"Invalidated {deleted} listing cache entries")
except redis.RedisError as e: