Flatten repo structure: move crawler/ to root, remove vqa/ and immoweb/

The crawler subdirectory was the only active project. Moving it to the
repo root simplifies paths and removes the unnecessary nesting. The
vqa/ and immoweb/ directories were legacy/unused and have been removed.

Updated .drone.yml, .gitignore, .claude/ docs, and skills to reflect
the new flat structure.
This commit is contained in:
Viktor Barzin 2026-02-07 23:01:20 +00:00
parent e2247be700
commit eafbc1ac52
No known key found for this signature in database
GPG key ID: 0EB088298288D958
221 changed files with 70 additions and 146140 deletions

103
services/listing_cache.py Normal file
View file

@ -0,0 +1,103 @@
"""Redis-based caching for listing GeoJSON query results."""
import hashlib
import json
import logging
import os
from typing import Generator
from urllib.parse import urlparse, urlunparse
import redis
from models.listing import QueryParameters
logger = logging.getLogger(__name__)
CACHE_PREFIX = "listings:geojson:"
CACHE_TTL_SECONDS = 30 * 60 # 30 minutes
CACHE_DB = 2
def _get_redis_client() -> redis.Redis:
"""Get Redis client using Celery broker URL but overriding to db=2."""
broker_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
parsed = urlparse(broker_url)
cache_url = urlunparse(parsed._replace(path=f"/{CACHE_DB}"))
return redis.from_url(cache_url, decode_responses=True)
def make_cache_key(query_params: QueryParameters) -> str:
"""Generate a cache key from query parameters."""
params_json = query_params.model_dump_json()
hash_suffix = hashlib.sha256(params_json.encode()).hexdigest()[:16]
return f"{CACHE_PREFIX}{hash_suffix}"
def get_cached_count(query_params: QueryParameters) -> int | None:
"""Return the number of cached features for a query, or None if not cached."""
try:
client = _get_redis_client()
key = make_cache_key(query_params)
if not client.exists(key):
return None
return client.llen(key)
except redis.RedisError as e:
logger.warning(f"Redis cache read error: {e}")
return None
def get_cached_features(
query_params: QueryParameters, batch_size: int = 50
) -> Generator[list[dict], None, None]:
"""Yield batches of cached GeoJSON features."""
try:
client = _get_redis_client()
key = make_cache_key(query_params)
total = client.llen(key)
for start in range(0, total, batch_size):
end = start + batch_size - 1
items = client.lrange(key, start, end)
batch = [json.loads(item) for item in items]
if batch:
yield batch
except redis.RedisError as e:
logger.warning(f"Redis cache read error during streaming: {e}")
def cache_features_batch(query_params: QueryParameters, features: list[dict]) -> None:
"""Append a batch of features to the cache list."""
if not features:
return
try:
client = _get_redis_client()
key = make_cache_key(query_params)
pipeline = client.pipeline()
for feature in features:
pipeline.rpush(key, json.dumps(feature))
# Set/refresh TTL
pipeline.expire(key, CACHE_TTL_SECONDS)
pipeline.execute()
except redis.RedisError as e:
logger.warning(f"Redis cache write error: {e}")
def invalidate_cache() -> None:
"""Delete all listing GeoJSON cache entries."""
try:
client = _get_redis_client()
cursor = 0
deleted = 0
while True:
cursor, keys = client.scan(cursor, match=f"{CACHE_PREFIX}*", count=100)
if keys:
pipeline = client.pipeline()
for key in keys:
pipeline.delete(key)
pipeline.execute()
deleted += len(keys)
if cursor == 0:
break
if deleted:
logger.info(f"Invalidated {deleted} listing cache entries")
except redis.RedisError as e:
logger.warning(f"Redis cache invalidation error: {e}")