wrongmove/crawler/services/listing_cache.py
Viktor Barzin 150342bb9e
Refactor codebase following Clean Code principles and add 229 tests
- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher)
  - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks
  - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens
  - Consolidate duplicate filter methods in listing_repository
  - Move hardcoded config to env vars with backward-compatible defaults
  - Simplify CLI decorator to auto-build QueryParameters
  - Add deprecation docstring to data_access.py
  - Test count: 158 → 387 (all passing)
2026-02-07 20:19:57 +00:00

103 lines
3.3 KiB
Python

"""Redis-based caching for listing GeoJSON query results."""
import hashlib
import json
import logging
import os
from typing import Generator
from urllib.parse import urlparse, urlunparse
import redis
from models.listing import QueryParameters
logger = logging.getLogger(__name__)
CACHE_PREFIX = "listings:geojson:"
CACHE_TTL_SECONDS = 30 * 60 # 30 minutes
CACHE_DB = 2
def _get_redis_client() -> redis.Redis:
"""Get Redis client using Celery broker URL but overriding to db=2."""
broker_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
parsed = urlparse(broker_url)
cache_url = urlunparse(parsed._replace(path=f"/{CACHE_DB}"))
return redis.from_url(cache_url, decode_responses=True)
def make_cache_key(query_params: QueryParameters) -> str:
"""Generate a cache key from query parameters."""
params_json = query_params.model_dump_json()
hash_suffix = hashlib.sha256(params_json.encode()).hexdigest()[:16]
return f"{CACHE_PREFIX}{hash_suffix}"
def get_cached_count(query_params: QueryParameters) -> int | None:
"""Return the number of cached features for a query, or None if not cached."""
try:
client = _get_redis_client()
key = make_cache_key(query_params)
if not client.exists(key):
return None
return client.llen(key)
except redis.RedisError as e:
logger.warning(f"Redis cache read error: {e}")
return None
def get_cached_features(
query_params: QueryParameters, batch_size: int = 50
) -> Generator[list[dict], None, None]:
"""Yield batches of cached GeoJSON features."""
try:
client = _get_redis_client()
key = make_cache_key(query_params)
total = client.llen(key)
for start in range(0, total, batch_size):
end = start + batch_size - 1
items = client.lrange(key, start, end)
batch = [json.loads(item) for item in items]
if batch:
yield batch
except redis.RedisError as e:
logger.warning(f"Redis cache read error during streaming: {e}")
def cache_features_batch(query_params: QueryParameters, features: list[dict]) -> None:
"""Append a batch of features to the cache list."""
if not features:
return
try:
client = _get_redis_client()
key = make_cache_key(query_params)
pipeline = client.pipeline()
for feature in features:
pipeline.rpush(key, json.dumps(feature))
# Set/refresh TTL
pipeline.expire(key, CACHE_TTL_SECONDS)
pipeline.execute()
except redis.RedisError as e:
logger.warning(f"Redis cache write error: {e}")
def invalidate_cache() -> None:
"""Delete all listing GeoJSON cache entries."""
try:
client = _get_redis_client()
cursor = 0
deleted = 0
while True:
cursor, keys = client.scan(cursor, match=f"{CACHE_PREFIX}*", count=100)
if keys:
pipeline = client.pipeline()
for key in keys:
pipeline.delete(key)
pipeline.execute()
deleted += len(keys)
if cursor == 0:
break
if deleted:
logger.info(f"Invalidated {deleted} listing cache entries")
except redis.RedisError as e:
logger.warning(f"Redis cache invalidation error: {e}")