Fix duplicate listings via staged Redis cache and frontend stream cancellation

Three-pronged fix for duplicate listings appearing in the UI:

1. Backend: Replace direct rpush cache writes with staged population
   (write to temp key, then atomic RENAME to live key). Skip cache
   writes entirely for POI-enriched requests. Clean staging keys on
   invalidation.

2. Frontend: Add AbortController to cancel in-flight streaming requests
   when loadListings is called again, preventing data mixing.

3. Frontend: Deduplicate features by URL during stream accumulation as
   a safety net against any remaining server-side duplicates.
This commit is contained in:
Viktor Barzin 2026-02-09 21:17:30 +00:00
parent 5b8aa98446
commit 73d19e29d5
No known key found for this signature in database
GPG key ID: 0EB088298288D958
5 changed files with 159 additions and 38 deletions

View file

@ -8,6 +8,7 @@ from api.auth import get_current_user
from api.config import DEV_TIER_ORIGINS, PROD_TIER_ORIGINS, APP_ENV
from api.passkey_routes import passkey_router
from api.poi_routes import poi_router
from api.ws_routes import ws_router
from api.rate_limit_config import RateLimitConfig
from api.rate_limiter import RateLimitMiddleware
from api.audit_middleware import AuditLogMiddleware
@ -30,7 +31,10 @@ from services import listing_service, export_service, district_service, task_ser
from services.listing_cache import (
get_cached_count,
get_cached_features,
cache_features_batch,
begin_cache_population,
cache_features_batch_staged,
finalize_cache_population,
delete_staging_key,
)
from repositories.poi_repository import POIRepository
from repositories.user_repository import UserRepository
@ -94,6 +98,7 @@ app = FastAPI(
)
app.include_router(passkey_router)
app.include_router(poi_router)
app.include_router(ws_router)
app.mount("/metrics", metrics_app)
meter = get_meter(__name__)
request_counter = meter.create_counter(
@ -213,6 +218,7 @@ async def _stream_from_db(
batch_size: int,
limit: int | None,
poi_distances_lookup: dict[int, list[dict[str, str | int]]] | None = None,
skip_cache: bool = False,
) -> AsyncGenerator[str, None]:
"""Stream GeoJSON features from the database, populating the cache as we go."""
repository = ListingRepository(engine)
@ -227,28 +233,44 @@ async def _stream_from_db(
"cached": False,
}) + "\n"
count = 0
batch: list[dict] = []
for row in repository.stream_listings_optimized(
query_parameters, limit=limit, page_size=batch_size
):
feature = convert_row_to_geojson(row, query_parameters.listing_type.value)
# Inject POI distances if available
if poi_distances_lookup and row['id'] in poi_distances_lookup:
feature['properties']['poi_distances'] = poi_distances_lookup[row['id']]
batch.append(feature)
count += 1
staging_key: str | None = None
if not skip_cache:
staging_key = begin_cache_population(query_parameters)
if len(batch) >= batch_size:
cache_features_batch(query_parameters, batch)
try:
count = 0
batch: list[dict] = []
for row in repository.stream_listings_optimized(
query_parameters, limit=limit, page_size=batch_size
):
feature = convert_row_to_geojson(row, query_parameters.listing_type.value)
# Inject POI distances if available
if poi_distances_lookup and row['id'] in poi_distances_lookup:
feature['properties']['poi_distances'] = poi_distances_lookup[row['id']]
batch.append(feature)
count += 1
if len(batch) >= batch_size:
if staging_key:
cache_features_batch_staged(staging_key, batch)
yield json.dumps({"type": "batch", "features": batch}) + "\n"
batch = []
if batch:
if staging_key:
cache_features_batch_staged(staging_key, batch)
yield json.dumps({"type": "batch", "features": batch}) + "\n"
batch = []
if batch:
cache_features_batch(query_parameters, batch)
yield json.dumps({"type": "batch", "features": batch}) + "\n"
# Atomically promote staged data to live cache
if staging_key:
finalize_cache_population(staging_key, query_parameters)
staging_key = None # Mark as finalized
yield json.dumps({"type": "complete", "total": count}) + "\n"
yield json.dumps({"type": "complete", "total": count}) + "\n"
finally:
# Clean up orphaned staging key on failure
if staging_key:
delete_staging_key(staging_key)
@app.get("/api/listing_geojson/stream")
@ -304,7 +326,10 @@ async def stream_listing_geojson(
if cached_count is not None and cached_count > 0 and not include_poi_distances:
generator = _stream_from_cache(query_parameters, batch_size, limit)
else:
generator = _stream_from_db(query_parameters, batch_size, limit, poi_distances_lookup)
generator = _stream_from_db(
query_parameters, batch_size, limit, poi_distances_lookup,
skip_cache=include_poi_distances,
)
return StreamingResponse(
generator,