Flatten repo structure: move crawler/ to root, remove vqa/ and immoweb/
The crawler subdirectory was the only active project. Moving it to the repo root simplifies paths and removes the unnecessary nesting. The vqa/ and immoweb/ directories were legacy/unused and have been removed. Updated .drone.yml, .gitignore, .claude/ docs, and skills to reflect the new flat structure.
This commit is contained in:
parent
e2247be700
commit
eafbc1ac52
221 changed files with 70 additions and 146140 deletions
103
services/listing_cache.py
Normal file
103
services/listing_cache.py
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
"""Redis-based caching for listing GeoJSON query results."""
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Generator
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
import redis
|
||||
|
||||
from models.listing import QueryParameters
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
CACHE_PREFIX = "listings:geojson:"
|
||||
CACHE_TTL_SECONDS = 30 * 60 # 30 minutes
|
||||
CACHE_DB = 2
|
||||
|
||||
|
||||
def _get_redis_client() -> redis.Redis:
|
||||
"""Get Redis client using Celery broker URL but overriding to db=2."""
|
||||
broker_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
|
||||
parsed = urlparse(broker_url)
|
||||
cache_url = urlunparse(parsed._replace(path=f"/{CACHE_DB}"))
|
||||
return redis.from_url(cache_url, decode_responses=True)
|
||||
|
||||
|
||||
def make_cache_key(query_params: QueryParameters) -> str:
|
||||
"""Generate a cache key from query parameters."""
|
||||
params_json = query_params.model_dump_json()
|
||||
hash_suffix = hashlib.sha256(params_json.encode()).hexdigest()[:16]
|
||||
return f"{CACHE_PREFIX}{hash_suffix}"
|
||||
|
||||
|
||||
def get_cached_count(query_params: QueryParameters) -> int | None:
|
||||
"""Return the number of cached features for a query, or None if not cached."""
|
||||
try:
|
||||
client = _get_redis_client()
|
||||
key = make_cache_key(query_params)
|
||||
if not client.exists(key):
|
||||
return None
|
||||
return client.llen(key)
|
||||
except redis.RedisError as e:
|
||||
logger.warning(f"Redis cache read error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def get_cached_features(
|
||||
query_params: QueryParameters, batch_size: int = 50
|
||||
) -> Generator[list[dict], None, None]:
|
||||
"""Yield batches of cached GeoJSON features."""
|
||||
try:
|
||||
client = _get_redis_client()
|
||||
key = make_cache_key(query_params)
|
||||
total = client.llen(key)
|
||||
|
||||
for start in range(0, total, batch_size):
|
||||
end = start + batch_size - 1
|
||||
items = client.lrange(key, start, end)
|
||||
batch = [json.loads(item) for item in items]
|
||||
if batch:
|
||||
yield batch
|
||||
except redis.RedisError as e:
|
||||
logger.warning(f"Redis cache read error during streaming: {e}")
|
||||
|
||||
|
||||
def cache_features_batch(query_params: QueryParameters, features: list[dict]) -> None:
|
||||
"""Append a batch of features to the cache list."""
|
||||
if not features:
|
||||
return
|
||||
try:
|
||||
client = _get_redis_client()
|
||||
key = make_cache_key(query_params)
|
||||
pipeline = client.pipeline()
|
||||
for feature in features:
|
||||
pipeline.rpush(key, json.dumps(feature))
|
||||
# Set/refresh TTL
|
||||
pipeline.expire(key, CACHE_TTL_SECONDS)
|
||||
pipeline.execute()
|
||||
except redis.RedisError as e:
|
||||
logger.warning(f"Redis cache write error: {e}")
|
||||
|
||||
|
||||
def invalidate_cache() -> None:
|
||||
"""Delete all listing GeoJSON cache entries."""
|
||||
try:
|
||||
client = _get_redis_client()
|
||||
cursor = 0
|
||||
deleted = 0
|
||||
while True:
|
||||
cursor, keys = client.scan(cursor, match=f"{CACHE_PREFIX}*", count=100)
|
||||
if keys:
|
||||
pipeline = client.pipeline()
|
||||
for key in keys:
|
||||
pipeline.delete(key)
|
||||
pipeline.execute()
|
||||
deleted += len(keys)
|
||||
if cursor == 0:
|
||||
break
|
||||
if deleted:
|
||||
logger.info(f"Invalidated {deleted} listing cache entries")
|
||||
except redis.RedisError as e:
|
||||
logger.warning(f"Redis cache invalidation error: {e}")
|
||||
Loading…
Add table
Add a link
Reference in a new issue