Refactor codebase following Clean Code principles and add 229 tests

- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher)
  - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks
  - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens
  - Consolidate duplicate filter methods in listing_repository
  - Move hardcoded config to env vars with backward-compatible defaults
  - Simplify CLI decorator to auto-build QueryParameters
  - Add deprecation docstring to data_access.py
  - Test count: 158 → 387 (all passing)
This commit is contained in:
Viktor Barzin 2026-02-07 20:19:57 +00:00
parent 7e05b3c971
commit 150342bb9e
No known key found for this signature in database
GPG key ID: 0EB088298288D958
48 changed files with 5029 additions and 990 deletions

View file

@ -0,0 +1,372 @@
"""Unit tests for the listing fetcher service."""
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from models.listing import ListingType, QueryParameters
from rec.exceptions import CircuitBreakerOpenError, ThrottlingError
from services.listing_fetcher import (
NUM_WORKERS,
_fetch_subquery,
dump_listings,
dump_listings_full,
)
from services.query_splitter import SubQuery
def _make_subquery(**kwargs) -> SubQuery:
"""Create a SubQuery with sensible defaults for testing."""
defaults = dict(
district="REGION^123",
min_bedrooms=1,
max_bedrooms=3,
min_price=1000,
max_price=3000,
estimated_results=50,
)
defaults.update(kwargs)
return SubQuery(**defaults)
class TestDumpListingsFull:
"""Tests for dump_listings_full."""
async def test_returns_empty_list_when_no_new_listings(self) -> None:
"""Test that empty results from dump_listings returns empty list."""
with patch(
"services.listing_fetcher.dump_listings",
new_callable=AsyncMock,
return_value=[],
):
mock_repo = AsyncMock()
mock_repo.get_listings = AsyncMock(return_value=[])
params = QueryParameters(listing_type=ListingType.RENT)
result = await dump_listings_full(params, mock_repo)
assert result == []
async def test_returns_only_new_listings_from_db(self) -> None:
"""Test that dump_listings_full fetches new listings by ID from the repository."""
mock_listing_1 = MagicMock()
mock_listing_1.id = 100
mock_listing_2 = MagicMock()
mock_listing_2.id = 200
with patch(
"services.listing_fetcher.dump_listings",
new_callable=AsyncMock,
return_value=[mock_listing_1, mock_listing_2],
):
mock_repo = AsyncMock()
mock_repo.get_listings = AsyncMock(
return_value=[mock_listing_1, mock_listing_2]
)
params = QueryParameters(listing_type=ListingType.RENT)
result = await dump_listings_full(params, mock_repo)
# Verify get_listings was called with the correct IDs
mock_repo.get_listings.assert_awaited_once_with(
only_ids=[100, 200]
)
assert len(result) == 2
class TestFetchSubquery:
"""Tests for _fetch_subquery helper."""
async def test_skips_subquery_with_zero_estimated_results(self) -> None:
"""Test that subqueries with 0 estimated results are skipped."""
sq = _make_subquery(estimated_results=0)
params = QueryParameters(listing_type=ListingType.RENT)
queue: asyncio.Queue[int | None] = asyncio.Queue()
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=MagicMock(),
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
assert ids_found == 0
assert queue.empty()
async def test_skips_subquery_with_none_estimated_results(self) -> None:
"""Test that subqueries with None estimated results are skipped."""
sq = _make_subquery(estimated_results=None)
params = QueryParameters(listing_type=ListingType.RENT)
queue: asyncio.Queue[int | None] = asyncio.Queue()
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=MagicMock(),
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
assert ids_found == 0
assert queue.empty()
async def test_enqueues_new_ids_only(self) -> None:
"""Test that only new (not existing) IDs are enqueued."""
sq = _make_subquery(estimated_results=10)
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
queue: asyncio.Queue[int | None] = asyncio.Queue()
existing_ids: set[int] = {101, 103}
mock_config = MagicMock()
mock_config.max_pages_per_query = 60
mock_config.request_delay_ms = 0
mock_config.max_concurrent_requests = 5
api_result = {
"properties": [
{"identifier": 101}, # existing
{"identifier": 102}, # new
{"identifier": 103}, # existing
{"identifier": 104}, # new
]
}
with patch(
"services.listing_fetcher.listing_query",
new_callable=AsyncMock,
return_value=api_result,
):
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=mock_config,
semaphore=asyncio.Semaphore(5),
existing_ids=existing_ids,
queue=queue,
)
assert ids_found == 2
# Verify that queued IDs are the new ones
queued = []
while not queue.empty():
queued.append(queue.get_nowait())
assert 102 in queued
assert 104 in queued
assert 101 not in queued
assert 103 not in queued
async def test_stops_on_circuit_breaker_error(self) -> None:
"""Test that CircuitBreakerOpenError breaks the page loop."""
sq = _make_subquery(estimated_results=100)
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
queue: asyncio.Queue[int | None] = asyncio.Queue()
mock_config = MagicMock()
mock_config.max_pages_per_query = 60
mock_config.request_delay_ms = 0
with patch(
"services.listing_fetcher.listing_query",
new_callable=AsyncMock,
side_effect=CircuitBreakerOpenError("open"),
):
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=mock_config,
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
assert ids_found == 0
assert queue.empty()
async def test_stops_on_throttling_error(self) -> None:
"""Test that ThrottlingError breaks the page loop."""
sq = _make_subquery(estimated_results=100)
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
queue: asyncio.Queue[int | None] = asyncio.Queue()
mock_config = MagicMock()
mock_config.max_pages_per_query = 60
mock_config.request_delay_ms = 0
with patch(
"services.listing_fetcher.listing_query",
new_callable=AsyncMock,
side_effect=ThrottlingError("throttled"),
):
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=mock_config,
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
assert ids_found == 0
assert queue.empty()
async def test_stops_on_generic_error(self) -> None:
"""Test that GENERIC_ERROR (past last page) stops pagination."""
sq = _make_subquery(estimated_results=100)
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
queue: asyncio.Queue[int | None] = asyncio.Queue()
mock_config = MagicMock()
mock_config.max_pages_per_query = 60
mock_config.request_delay_ms = 0
with patch(
"services.listing_fetcher.listing_query",
new_callable=AsyncMock,
side_effect=Exception("GENERIC_ERROR: no more results"),
):
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=mock_config,
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
assert ids_found == 0
assert queue.empty()
async def test_stops_on_unexpected_error(self) -> None:
"""Test that unexpected errors also stop pagination."""
sq = _make_subquery(estimated_results=100)
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
queue: asyncio.Queue[int | None] = asyncio.Queue()
mock_config = MagicMock()
mock_config.max_pages_per_query = 60
mock_config.request_delay_ms = 0
with patch(
"services.listing_fetcher.listing_query",
new_callable=AsyncMock,
side_effect=Exception("some network error"),
):
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=mock_config,
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
assert ids_found == 0
assert queue.empty()
async def test_stops_when_fewer_results_than_page_size(self) -> None:
"""Test that pagination stops when a page has fewer results than page_size."""
sq = _make_subquery(estimated_results=100)
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
queue: asyncio.Queue[int | None] = asyncio.Queue()
mock_config = MagicMock()
mock_config.max_pages_per_query = 60
mock_config.request_delay_ms = 0
# Return fewer properties than page_size
api_result = {
"properties": [
{"identifier": 1},
{"identifier": 2},
]
}
with patch(
"services.listing_fetcher.listing_query",
new_callable=AsyncMock,
return_value=api_result,
) as mock_query:
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=mock_config,
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
# Should have called listing_query exactly once (then stopped)
assert mock_query.await_count == 1
assert ids_found == 2
class TestDumpListings:
"""Tests for dump_listings."""
async def test_circuit_breaker_returns_empty_list(self) -> None:
"""Test that CircuitBreakerOpenError returns empty list."""
mock_repo = AsyncMock()
params = QueryParameters(listing_type=ListingType.RENT)
with patch("services.listing_fetcher.create_session") as mock_cs:
mock_cs.side_effect = CircuitBreakerOpenError("open")
result = await dump_listings(params, mock_repo)
assert result == []
async def test_returns_processed_listings(self) -> None:
"""Test that dump_listings returns processed listings from the pipeline."""
mock_repo = AsyncMock()
mock_repo.get_listing_ids = MagicMock(return_value=set())
params = QueryParameters(listing_type=ListingType.RENT)
mock_listing = MagicMock()
mock_listing.id = 42
mock_session_cm = AsyncMock()
mock_session = MagicMock()
mock_session_cm.__aenter__ = AsyncMock(return_value=mock_session)
mock_session_cm.__aexit__ = AsyncMock(return_value=False)
with (
patch(
"services.listing_fetcher.create_session",
return_value=mock_session_cm,
),
patch(
"services.listing_fetcher.QuerySplitter"
) as mock_splitter_cls,
patch(
"services.listing_fetcher._fetch_subquery",
new_callable=AsyncMock,
return_value=0,
),
):
mock_splitter = mock_splitter_cls.return_value
mock_splitter.split = AsyncMock(return_value=[])
mock_splitter.calculate_total_estimated_results = MagicMock(
return_value=0
)
result = await dump_listings(params, mock_repo)
# With no subqueries, no listings are processed
assert result == []
class TestNumWorkers:
"""Tests for NUM_WORKERS constant."""
def test_num_workers_is_positive(self) -> None:
"""Test that NUM_WORKERS is a positive integer."""
assert NUM_WORKERS > 0
def test_num_workers_value(self) -> None:
"""Test that NUM_WORKERS has the expected value."""
assert NUM_WORKERS == 20