Refactor codebase following Clean Code principles and add 229 tests

- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher)
  - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks
  - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens
  - Consolidate duplicate filter methods in listing_repository
  - Move hardcoded config to env vars with backward-compatible defaults
  - Simplify CLI decorator to auto-build QueryParameters
  - Add deprecation docstring to data_access.py
  - Test count: 158 → 387 (all passing)
This commit is contained in:
Viktor Barzin 2026-02-07 20:19:57 +00:00
parent 7e05b3c971
commit 150342bb9e
No known key found for this signature in database
GPG key ID: 0EB088298288D958
48 changed files with 5029 additions and 990 deletions

View file

@ -1,5 +1,6 @@
"""Integration tests for API endpoints."""
from unittest.mock import AsyncMock, patch
import json
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from httpx import AsyncClient
@ -75,10 +76,12 @@ class TestListingGeoJsonEndpoint:
self, async_client: AsyncClient
) -> None:
"""Test that listing_geojson accepts filter parameters."""
mock_result = MagicMock()
mock_result.data = {"type": "FeatureCollection", "features": []}
with patch(
"api.app.export_immoweb",
"api.app.export_service.export_to_geojson",
new_callable=AsyncMock,
return_value={"type": "FeatureCollection", "features": []},
return_value=mock_result,
):
response = await async_client.get(
"/api/listing_geojson",
@ -178,3 +181,135 @@ class TestTaskStatusEndpoint:
)
# Should return 401 or 403 without valid auth
assert response.status_code in (401, 403)
class TestStreamListingGeoJsonEndpoint:
"""Tests for the /api/listing_geojson/stream endpoint."""
async def test_stream_returns_ndjson_with_metadata(
self, async_client: AsyncClient
) -> None:
"""Test that the stream endpoint returns valid NDJSON starting with a metadata message."""
fake_features = [
{"type": "Feature", "properties": {"id": 1}, "geometry": {"type": "Point", "coordinates": [0, 0]}},
{"type": "Feature", "properties": {"id": 2}, "geometry": {"type": "Point", "coordinates": [1, 1]}},
]
with patch("api.app.get_cached_count", return_value=2), \
patch("api.app.get_cached_features", return_value=iter([fake_features])):
response = await async_client.get(
"/api/listing_geojson/stream",
params={"listing_type": "RENT", "batch_size": 50},
)
assert response.status_code == 200
assert response.headers["content-type"] == "application/x-ndjson"
lines = [line for line in response.text.strip().split("\n") if line]
assert len(lines) >= 2 # at least metadata + complete
metadata = json.loads(lines[0])
assert metadata["type"] == "metadata"
assert "batch_size" in metadata
assert "total_expected" in metadata
complete = json.loads(lines[-1])
assert complete["type"] == "complete"
assert "total" in complete
async def test_stream_cache_hit_path(
self, async_client: AsyncClient
) -> None:
"""Test that cache-hit path returns cached: True in metadata."""
fake_features = [
{"type": "Feature", "properties": {"id": 1}, "geometry": {"type": "Point", "coordinates": [0, 0]}},
]
with patch("api.app.get_cached_count", return_value=1), \
patch("api.app.get_cached_features", return_value=iter([fake_features])):
response = await async_client.get(
"/api/listing_geojson/stream",
params={"listing_type": "RENT"},
)
assert response.status_code == 200
lines = [line for line in response.text.strip().split("\n") if line]
metadata = json.loads(lines[0])
assert metadata["cached"] is True
assert metadata["total_expected"] == 1
batch_msg = json.loads(lines[1])
assert batch_msg["type"] == "batch"
assert len(batch_msg["features"]) == 1
async def test_stream_cache_miss_path(
self, async_client: AsyncClient
) -> None:
"""Test that cache-miss path queries DB and returns cached: False."""
from datetime import datetime
fake_rows = [
{
"id": 100,
"price": 2000.0,
"number_of_bedrooms": 2,
"square_meters": 50.0,
"longitude": -0.1,
"latitude": 51.5,
"photo_thumbnail": None,
"last_seen": datetime(2024, 1, 1),
"agency": "Test Agency",
"price_history_json": "[]",
"available_from": None,
},
]
mock_repo = MagicMock()
mock_repo.count_listings.return_value = 1
mock_repo.stream_listings_optimized.return_value = iter(fake_rows)
with patch("api.app.get_cached_count", return_value=None), \
patch("api.app.ListingRepository", return_value=mock_repo), \
patch("api.app.cache_features_batch"):
response = await async_client.get(
"/api/listing_geojson/stream",
params={"listing_type": "RENT"},
)
assert response.status_code == 200
lines = [line for line in response.text.strip().split("\n") if line]
metadata = json.loads(lines[0])
assert metadata["cached"] is False
assert metadata["total_expected"] == 1
batch_msg = json.loads(lines[1])
assert batch_msg["type"] == "batch"
assert len(batch_msg["features"]) == 1
assert batch_msg["features"][0]["type"] == "Feature"
assert batch_msg["features"][0]["properties"]["total_price"] == 2000.0
complete = json.loads(lines[-1])
assert complete["type"] == "complete"
assert complete["total"] == 1
async def test_stream_with_limit(
self, async_client: AsyncClient
) -> None:
"""Test that the limit parameter caps the number of streamed features."""
fake_features = [
{"type": "Feature", "properties": {"id": i}, "geometry": {"type": "Point", "coordinates": [0, 0]}}
for i in range(5)
]
with patch("api.app.get_cached_count", return_value=5), \
patch("api.app.get_cached_features", return_value=iter([fake_features])):
response = await async_client.get(
"/api/listing_geojson/stream",
params={"listing_type": "RENT", "limit": 3},
)
assert response.status_code == 200
lines = [line for line in response.text.strip().split("\n") if line]
metadata = json.loads(lines[0])
assert metadata["total_expected"] == 3
complete = json.loads(lines[-1])
assert complete["type"] == "complete"
assert complete["total"] == 3

View file

@ -77,7 +77,7 @@ class TestThrottlingRetryBehavior:
"""Test that 429 responses trigger retry with backoff."""
call_count = 0
async def mock_get(*args: object, **kwargs: object) -> MockResponse:
def mock_get(*args: object, **kwargs: object) -> MockResponse:
nonlocal call_count
call_count += 1
if call_count < 3:
@ -117,7 +117,7 @@ class TestThrottlingRetryBehavior:
"""Test that 503 responses trigger retry."""
call_count = 0
async def mock_get(*args: object, **kwargs: object) -> MockResponse:
def mock_get(*args: object, **kwargs: object) -> MockResponse:
nonlocal call_count
call_count += 1
if call_count < 2:
@ -157,7 +157,7 @@ class TestCircuitBreakerIntegration:
"""Test that circuit breaker opens after consecutive failures."""
call_count = 0
async def mock_get(*args: object, **kwargs: object) -> MockResponse:
def mock_get(*args: object, **kwargs: object) -> MockResponse:
nonlocal call_count
call_count += 1
return MockResponse(status=429)
@ -223,14 +223,14 @@ class TestMetricsTracking:
@pytest.mark.asyncio
async def test_metrics_tracked_on_rate_limit(self, config: ScraperConfig) -> None:
"""Test that rate limit errors are tracked in metrics."""
async def mock_get(*args: object, **kwargs: object) -> MockResponse:
def mock_get(*args: object, **kwargs: object) -> MockResponse:
return MockResponse(status=429)
mock_session = MagicMock()
mock_session.get = mock_get
with patch("rec.query.districts.get_districts", return_value={"Test": "LOC1"}):
with pytest.raises(RateLimitError):
with pytest.raises((RateLimitError, CircuitBreakerOpenError)):
with patch("tenacity.wait_exponential.__call__", return_value=0):
await probe_query(
session=mock_session,
@ -250,7 +250,7 @@ class TestMetricsTracking:
@pytest.mark.asyncio
async def test_metrics_tracked_on_success(self, config: ScraperConfig) -> None:
"""Test that successful requests are tracked in metrics."""
async def mock_get(*args: object, **kwargs: object) -> MockResponse:
def mock_get(*args: object, **kwargs: object) -> MockResponse:
return MockResponse(
status=200,
json_data={"totalAvailableResults": 10, "properties": []},