- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher) - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens - Consolidate duplicate filter methods in listing_repository - Move hardcoded config to env vars with backward-compatible defaults - Simplify CLI decorator to auto-build QueryParameters - Add deprecation docstring to data_access.py - Test count: 158 → 387 (all passing)
372 lines
13 KiB
Python
372 lines
13 KiB
Python
"""Unit tests for the listing fetcher service."""
|
|
import asyncio
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from models.listing import ListingType, QueryParameters
|
|
from rec.exceptions import CircuitBreakerOpenError, ThrottlingError
|
|
from services.listing_fetcher import (
|
|
NUM_WORKERS,
|
|
_fetch_subquery,
|
|
dump_listings,
|
|
dump_listings_full,
|
|
)
|
|
from services.query_splitter import SubQuery
|
|
|
|
|
|
def _make_subquery(**kwargs) -> SubQuery:
|
|
"""Create a SubQuery with sensible defaults for testing."""
|
|
defaults = dict(
|
|
district="REGION^123",
|
|
min_bedrooms=1,
|
|
max_bedrooms=3,
|
|
min_price=1000,
|
|
max_price=3000,
|
|
estimated_results=50,
|
|
)
|
|
defaults.update(kwargs)
|
|
return SubQuery(**defaults)
|
|
|
|
|
|
class TestDumpListingsFull:
|
|
"""Tests for dump_listings_full."""
|
|
|
|
async def test_returns_empty_list_when_no_new_listings(self) -> None:
|
|
"""Test that empty results from dump_listings returns empty list."""
|
|
with patch(
|
|
"services.listing_fetcher.dump_listings",
|
|
new_callable=AsyncMock,
|
|
return_value=[],
|
|
):
|
|
mock_repo = AsyncMock()
|
|
mock_repo.get_listings = AsyncMock(return_value=[])
|
|
params = QueryParameters(listing_type=ListingType.RENT)
|
|
result = await dump_listings_full(params, mock_repo)
|
|
assert result == []
|
|
|
|
async def test_returns_only_new_listings_from_db(self) -> None:
|
|
"""Test that dump_listings_full fetches new listings by ID from the repository."""
|
|
mock_listing_1 = MagicMock()
|
|
mock_listing_1.id = 100
|
|
mock_listing_2 = MagicMock()
|
|
mock_listing_2.id = 200
|
|
|
|
with patch(
|
|
"services.listing_fetcher.dump_listings",
|
|
new_callable=AsyncMock,
|
|
return_value=[mock_listing_1, mock_listing_2],
|
|
):
|
|
mock_repo = AsyncMock()
|
|
mock_repo.get_listings = AsyncMock(
|
|
return_value=[mock_listing_1, mock_listing_2]
|
|
)
|
|
params = QueryParameters(listing_type=ListingType.RENT)
|
|
result = await dump_listings_full(params, mock_repo)
|
|
|
|
# Verify get_listings was called with the correct IDs
|
|
mock_repo.get_listings.assert_awaited_once_with(
|
|
only_ids=[100, 200]
|
|
)
|
|
assert len(result) == 2
|
|
|
|
|
|
class TestFetchSubquery:
|
|
"""Tests for _fetch_subquery helper."""
|
|
|
|
async def test_skips_subquery_with_zero_estimated_results(self) -> None:
|
|
"""Test that subqueries with 0 estimated results are skipped."""
|
|
sq = _make_subquery(estimated_results=0)
|
|
params = QueryParameters(listing_type=ListingType.RENT)
|
|
queue: asyncio.Queue[int | None] = asyncio.Queue()
|
|
|
|
ids_found = await _fetch_subquery(
|
|
sq=sq,
|
|
parameters=params,
|
|
session=MagicMock(),
|
|
config=MagicMock(),
|
|
semaphore=asyncio.Semaphore(5),
|
|
existing_ids=set(),
|
|
queue=queue,
|
|
)
|
|
|
|
assert ids_found == 0
|
|
assert queue.empty()
|
|
|
|
async def test_skips_subquery_with_none_estimated_results(self) -> None:
|
|
"""Test that subqueries with None estimated results are skipped."""
|
|
sq = _make_subquery(estimated_results=None)
|
|
params = QueryParameters(listing_type=ListingType.RENT)
|
|
queue: asyncio.Queue[int | None] = asyncio.Queue()
|
|
|
|
ids_found = await _fetch_subquery(
|
|
sq=sq,
|
|
parameters=params,
|
|
session=MagicMock(),
|
|
config=MagicMock(),
|
|
semaphore=asyncio.Semaphore(5),
|
|
existing_ids=set(),
|
|
queue=queue,
|
|
)
|
|
|
|
assert ids_found == 0
|
|
assert queue.empty()
|
|
|
|
async def test_enqueues_new_ids_only(self) -> None:
|
|
"""Test that only new (not existing) IDs are enqueued."""
|
|
sq = _make_subquery(estimated_results=10)
|
|
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
|
|
queue: asyncio.Queue[int | None] = asyncio.Queue()
|
|
existing_ids: set[int] = {101, 103}
|
|
|
|
mock_config = MagicMock()
|
|
mock_config.max_pages_per_query = 60
|
|
mock_config.request_delay_ms = 0
|
|
mock_config.max_concurrent_requests = 5
|
|
|
|
api_result = {
|
|
"properties": [
|
|
{"identifier": 101}, # existing
|
|
{"identifier": 102}, # new
|
|
{"identifier": 103}, # existing
|
|
{"identifier": 104}, # new
|
|
]
|
|
}
|
|
|
|
with patch(
|
|
"services.listing_fetcher.listing_query",
|
|
new_callable=AsyncMock,
|
|
return_value=api_result,
|
|
):
|
|
ids_found = await _fetch_subquery(
|
|
sq=sq,
|
|
parameters=params,
|
|
session=MagicMock(),
|
|
config=mock_config,
|
|
semaphore=asyncio.Semaphore(5),
|
|
existing_ids=existing_ids,
|
|
queue=queue,
|
|
)
|
|
|
|
assert ids_found == 2
|
|
# Verify that queued IDs are the new ones
|
|
queued = []
|
|
while not queue.empty():
|
|
queued.append(queue.get_nowait())
|
|
assert 102 in queued
|
|
assert 104 in queued
|
|
assert 101 not in queued
|
|
assert 103 not in queued
|
|
|
|
async def test_stops_on_circuit_breaker_error(self) -> None:
|
|
"""Test that CircuitBreakerOpenError breaks the page loop."""
|
|
sq = _make_subquery(estimated_results=100)
|
|
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
|
|
queue: asyncio.Queue[int | None] = asyncio.Queue()
|
|
|
|
mock_config = MagicMock()
|
|
mock_config.max_pages_per_query = 60
|
|
mock_config.request_delay_ms = 0
|
|
|
|
with patch(
|
|
"services.listing_fetcher.listing_query",
|
|
new_callable=AsyncMock,
|
|
side_effect=CircuitBreakerOpenError("open"),
|
|
):
|
|
ids_found = await _fetch_subquery(
|
|
sq=sq,
|
|
parameters=params,
|
|
session=MagicMock(),
|
|
config=mock_config,
|
|
semaphore=asyncio.Semaphore(5),
|
|
existing_ids=set(),
|
|
queue=queue,
|
|
)
|
|
|
|
assert ids_found == 0
|
|
assert queue.empty()
|
|
|
|
async def test_stops_on_throttling_error(self) -> None:
|
|
"""Test that ThrottlingError breaks the page loop."""
|
|
sq = _make_subquery(estimated_results=100)
|
|
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
|
|
queue: asyncio.Queue[int | None] = asyncio.Queue()
|
|
|
|
mock_config = MagicMock()
|
|
mock_config.max_pages_per_query = 60
|
|
mock_config.request_delay_ms = 0
|
|
|
|
with patch(
|
|
"services.listing_fetcher.listing_query",
|
|
new_callable=AsyncMock,
|
|
side_effect=ThrottlingError("throttled"),
|
|
):
|
|
ids_found = await _fetch_subquery(
|
|
sq=sq,
|
|
parameters=params,
|
|
session=MagicMock(),
|
|
config=mock_config,
|
|
semaphore=asyncio.Semaphore(5),
|
|
existing_ids=set(),
|
|
queue=queue,
|
|
)
|
|
|
|
assert ids_found == 0
|
|
assert queue.empty()
|
|
|
|
async def test_stops_on_generic_error(self) -> None:
|
|
"""Test that GENERIC_ERROR (past last page) stops pagination."""
|
|
sq = _make_subquery(estimated_results=100)
|
|
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
|
|
queue: asyncio.Queue[int | None] = asyncio.Queue()
|
|
|
|
mock_config = MagicMock()
|
|
mock_config.max_pages_per_query = 60
|
|
mock_config.request_delay_ms = 0
|
|
|
|
with patch(
|
|
"services.listing_fetcher.listing_query",
|
|
new_callable=AsyncMock,
|
|
side_effect=Exception("GENERIC_ERROR: no more results"),
|
|
):
|
|
ids_found = await _fetch_subquery(
|
|
sq=sq,
|
|
parameters=params,
|
|
session=MagicMock(),
|
|
config=mock_config,
|
|
semaphore=asyncio.Semaphore(5),
|
|
existing_ids=set(),
|
|
queue=queue,
|
|
)
|
|
|
|
assert ids_found == 0
|
|
assert queue.empty()
|
|
|
|
async def test_stops_on_unexpected_error(self) -> None:
|
|
"""Test that unexpected errors also stop pagination."""
|
|
sq = _make_subquery(estimated_results=100)
|
|
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
|
|
queue: asyncio.Queue[int | None] = asyncio.Queue()
|
|
|
|
mock_config = MagicMock()
|
|
mock_config.max_pages_per_query = 60
|
|
mock_config.request_delay_ms = 0
|
|
|
|
with patch(
|
|
"services.listing_fetcher.listing_query",
|
|
new_callable=AsyncMock,
|
|
side_effect=Exception("some network error"),
|
|
):
|
|
ids_found = await _fetch_subquery(
|
|
sq=sq,
|
|
parameters=params,
|
|
session=MagicMock(),
|
|
config=mock_config,
|
|
semaphore=asyncio.Semaphore(5),
|
|
existing_ids=set(),
|
|
queue=queue,
|
|
)
|
|
|
|
assert ids_found == 0
|
|
assert queue.empty()
|
|
|
|
async def test_stops_when_fewer_results_than_page_size(self) -> None:
|
|
"""Test that pagination stops when a page has fewer results than page_size."""
|
|
sq = _make_subquery(estimated_results=100)
|
|
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
|
|
queue: asyncio.Queue[int | None] = asyncio.Queue()
|
|
|
|
mock_config = MagicMock()
|
|
mock_config.max_pages_per_query = 60
|
|
mock_config.request_delay_ms = 0
|
|
|
|
# Return fewer properties than page_size
|
|
api_result = {
|
|
"properties": [
|
|
{"identifier": 1},
|
|
{"identifier": 2},
|
|
]
|
|
}
|
|
|
|
with patch(
|
|
"services.listing_fetcher.listing_query",
|
|
new_callable=AsyncMock,
|
|
return_value=api_result,
|
|
) as mock_query:
|
|
ids_found = await _fetch_subquery(
|
|
sq=sq,
|
|
parameters=params,
|
|
session=MagicMock(),
|
|
config=mock_config,
|
|
semaphore=asyncio.Semaphore(5),
|
|
existing_ids=set(),
|
|
queue=queue,
|
|
)
|
|
|
|
# Should have called listing_query exactly once (then stopped)
|
|
assert mock_query.await_count == 1
|
|
assert ids_found == 2
|
|
|
|
|
|
class TestDumpListings:
|
|
"""Tests for dump_listings."""
|
|
|
|
async def test_circuit_breaker_returns_empty_list(self) -> None:
|
|
"""Test that CircuitBreakerOpenError returns empty list."""
|
|
mock_repo = AsyncMock()
|
|
params = QueryParameters(listing_type=ListingType.RENT)
|
|
|
|
with patch("services.listing_fetcher.create_session") as mock_cs:
|
|
mock_cs.side_effect = CircuitBreakerOpenError("open")
|
|
result = await dump_listings(params, mock_repo)
|
|
assert result == []
|
|
|
|
async def test_returns_processed_listings(self) -> None:
|
|
"""Test that dump_listings returns processed listings from the pipeline."""
|
|
mock_repo = AsyncMock()
|
|
mock_repo.get_listing_ids = MagicMock(return_value=set())
|
|
params = QueryParameters(listing_type=ListingType.RENT)
|
|
|
|
mock_listing = MagicMock()
|
|
mock_listing.id = 42
|
|
|
|
mock_session_cm = AsyncMock()
|
|
mock_session = MagicMock()
|
|
mock_session_cm.__aenter__ = AsyncMock(return_value=mock_session)
|
|
mock_session_cm.__aexit__ = AsyncMock(return_value=False)
|
|
|
|
with (
|
|
patch(
|
|
"services.listing_fetcher.create_session",
|
|
return_value=mock_session_cm,
|
|
),
|
|
patch(
|
|
"services.listing_fetcher.QuerySplitter"
|
|
) as mock_splitter_cls,
|
|
patch(
|
|
"services.listing_fetcher._fetch_subquery",
|
|
new_callable=AsyncMock,
|
|
return_value=0,
|
|
),
|
|
):
|
|
mock_splitter = mock_splitter_cls.return_value
|
|
mock_splitter.split = AsyncMock(return_value=[])
|
|
mock_splitter.calculate_total_estimated_results = MagicMock(
|
|
return_value=0
|
|
)
|
|
|
|
result = await dump_listings(params, mock_repo)
|
|
# With no subqueries, no listings are processed
|
|
assert result == []
|
|
|
|
|
|
class TestNumWorkers:
|
|
"""Tests for NUM_WORKERS constant."""
|
|
|
|
def test_num_workers_is_positive(self) -> None:
|
|
"""Test that NUM_WORKERS is a positive integer."""
|
|
assert NUM_WORKERS > 0
|
|
|
|
def test_num_workers_value(self) -> None:
|
|
"""Test that NUM_WORKERS has the expected value."""
|
|
assert NUM_WORKERS == 20
|