Refactor codebase following Clean Code principles and add 229 tests
- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher) - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens - Consolidate duplicate filter methods in listing_repository - Move hardcoded config to env vars with backward-compatible defaults - Simplify CLI decorator to auto-build QueryParameters - Add deprecation docstring to data_access.py - Test count: 158 → 387 (all passing)
This commit is contained in:
parent
7e05b3c971
commit
150342bb9e
48 changed files with 5029 additions and 990 deletions
|
|
@ -3,7 +3,7 @@ from datetime import datetime, timedelta
|
|||
import json
|
||||
import logging
|
||||
import logging.config
|
||||
from typing import Annotated, Optional
|
||||
from typing import Annotated, AsyncGenerator, Optional
|
||||
from api.auth import get_current_user
|
||||
from api.config import DEV_TIER_ORIGINS, PROD_TIER_ORIGINS
|
||||
from api.passkey_routes import passkey_router
|
||||
|
|
@ -32,6 +32,8 @@ from opentelemetry.metrics import get_meter
|
|||
load_dotenv()
|
||||
logger = logging.getLogger("uvicorn")
|
||||
|
||||
DEFAULT_BATCH_SIZE = 50
|
||||
|
||||
|
||||
def get_query_parameters(
|
||||
listing_type: ListingType,
|
||||
|
|
@ -120,11 +122,79 @@ async def get_listing_geojson(
|
|||
return result.data
|
||||
|
||||
|
||||
|
||||
async def _stream_from_cache(
|
||||
query_parameters: QueryParameters,
|
||||
batch_size: int,
|
||||
limit: int | None,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""Stream GeoJSON features from the Redis cache (cache-hit path)."""
|
||||
cached_count = get_cached_count(query_parameters)
|
||||
effective_total = min(limit, cached_count) if limit and cached_count else cached_count
|
||||
|
||||
yield json.dumps({
|
||||
"type": "metadata",
|
||||
"batch_size": batch_size,
|
||||
"total_expected": effective_total,
|
||||
"cached": True,
|
||||
}) + "\n"
|
||||
|
||||
count = 0
|
||||
for feature_batch in get_cached_features(query_parameters, batch_size=batch_size):
|
||||
if limit and count + len(feature_batch) > limit:
|
||||
feature_batch = feature_batch[:limit - count]
|
||||
count += len(feature_batch)
|
||||
yield json.dumps({"type": "batch", "features": feature_batch}) + "\n"
|
||||
if limit and count >= limit:
|
||||
break
|
||||
|
||||
yield json.dumps({"type": "complete", "total": count}) + "\n"
|
||||
|
||||
|
||||
async def _stream_from_db(
|
||||
query_parameters: QueryParameters,
|
||||
batch_size: int,
|
||||
limit: int | None,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""Stream GeoJSON features from the database, populating the cache as we go."""
|
||||
repository = ListingRepository(engine)
|
||||
|
||||
total = repository.count_listings(query_parameters)
|
||||
effective_total = min(limit, total) if limit else total
|
||||
|
||||
yield json.dumps({
|
||||
"type": "metadata",
|
||||
"batch_size": batch_size,
|
||||
"total_expected": effective_total,
|
||||
"cached": False,
|
||||
}) + "\n"
|
||||
|
||||
count = 0
|
||||
batch: list[dict] = []
|
||||
for row in repository.stream_listings_optimized(
|
||||
query_parameters, limit=limit, page_size=batch_size
|
||||
):
|
||||
feature = convert_row_to_geojson(row, query_parameters.listing_type.value)
|
||||
batch.append(feature)
|
||||
count += 1
|
||||
|
||||
if len(batch) >= batch_size:
|
||||
cache_features_batch(query_parameters, batch)
|
||||
yield json.dumps({"type": "batch", "features": batch}) + "\n"
|
||||
batch = []
|
||||
|
||||
if batch:
|
||||
cache_features_batch(query_parameters, batch)
|
||||
yield json.dumps({"type": "batch", "features": batch}) + "\n"
|
||||
|
||||
yield json.dumps({"type": "complete", "total": count}) + "\n"
|
||||
|
||||
|
||||
@app.get("/api/listing_geojson/stream")
|
||||
async def stream_listing_geojson(
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)],
|
||||
batch_size: int = 50,
|
||||
batch_size: int = DEFAULT_BATCH_SIZE,
|
||||
limit: int | None = None,
|
||||
) -> StreamingResponse:
|
||||
"""Stream listings as NDJSON for progressive map loading.
|
||||
|
|
@ -134,71 +204,14 @@ async def stream_listing_geojson(
|
|||
- batch: Array of GeoJSON features
|
||||
- complete: Final message with total count
|
||||
"""
|
||||
async def generate():
|
||||
# Check cache first
|
||||
cached_count = get_cached_count(query_parameters)
|
||||
|
||||
if cached_count is not None and cached_count > 0:
|
||||
# Cache HIT
|
||||
effective_total = min(limit, cached_count) if limit else cached_count
|
||||
|
||||
yield json.dumps({
|
||||
"type": "metadata",
|
||||
"batch_size": batch_size,
|
||||
"total_expected": effective_total,
|
||||
"cached": True,
|
||||
}) + "\n"
|
||||
|
||||
count = 0
|
||||
for feature_batch in get_cached_features(query_parameters, batch_size=batch_size):
|
||||
if limit and count + len(feature_batch) > limit:
|
||||
feature_batch = feature_batch[:limit - count]
|
||||
count += len(feature_batch)
|
||||
yield json.dumps({"type": "batch", "features": feature_batch}) + "\n"
|
||||
if limit and count >= limit:
|
||||
break
|
||||
|
||||
yield json.dumps({"type": "complete", "total": count}) + "\n"
|
||||
else:
|
||||
# Cache MISS - query DB and populate cache
|
||||
repository = ListingRepository(engine)
|
||||
|
||||
# Phase 1: Fast count for progress estimation
|
||||
total = repository.count_listings(query_parameters)
|
||||
effective_total = min(limit, total) if limit else total
|
||||
|
||||
yield json.dumps({
|
||||
"type": "metadata",
|
||||
"batch_size": batch_size,
|
||||
"total_expected": effective_total,
|
||||
"cached": False,
|
||||
}) + "\n"
|
||||
|
||||
# Phase 2: Stream with column projection and keyset pagination
|
||||
count = 0
|
||||
batch = []
|
||||
for row in repository.stream_listings_optimized(
|
||||
query_parameters, limit=limit, page_size=batch_size
|
||||
):
|
||||
feature = convert_row_to_geojson(row, query_parameters.listing_type.value)
|
||||
batch.append(feature)
|
||||
count += 1
|
||||
|
||||
if len(batch) >= batch_size:
|
||||
cache_features_batch(query_parameters, batch)
|
||||
yield json.dumps({"type": "batch", "features": batch}) + "\n"
|
||||
batch = []
|
||||
|
||||
# Send remaining
|
||||
if batch:
|
||||
cache_features_batch(query_parameters, batch)
|
||||
yield json.dumps({"type": "batch", "features": batch}) + "\n"
|
||||
|
||||
# Final message
|
||||
yield json.dumps({"type": "complete", "total": count}) + "\n"
|
||||
cached_count = get_cached_count(query_parameters)
|
||||
if cached_count is not None and cached_count > 0:
|
||||
generator = _stream_from_cache(query_parameters, batch_size, limit)
|
||||
else:
|
||||
generator = _stream_from_db(query_parameters, batch_size, limit)
|
||||
|
||||
return StreamingResponse(
|
||||
generate(),
|
||||
generator,
|
||||
media_type="application/x-ndjson",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue