wrongmove/crawler/tests/unit/test_listing_fetcher.py
Viktor Barzin 150342bb9e
Refactor codebase following Clean Code principles and add 229 tests
- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher)
  - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks
  - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens
  - Consolidate duplicate filter methods in listing_repository
  - Move hardcoded config to env vars with backward-compatible defaults
  - Simplify CLI decorator to auto-build QueryParameters
  - Add deprecation docstring to data_access.py
  - Test count: 158 → 387 (all passing)
2026-02-07 20:19:57 +00:00

372 lines
13 KiB
Python

"""Unit tests for the listing fetcher service."""
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from models.listing import ListingType, QueryParameters
from rec.exceptions import CircuitBreakerOpenError, ThrottlingError
from services.listing_fetcher import (
NUM_WORKERS,
_fetch_subquery,
dump_listings,
dump_listings_full,
)
from services.query_splitter import SubQuery
def _make_subquery(**kwargs) -> SubQuery:
"""Create a SubQuery with sensible defaults for testing."""
defaults = dict(
district="REGION^123",
min_bedrooms=1,
max_bedrooms=3,
min_price=1000,
max_price=3000,
estimated_results=50,
)
defaults.update(kwargs)
return SubQuery(**defaults)
class TestDumpListingsFull:
"""Tests for dump_listings_full."""
async def test_returns_empty_list_when_no_new_listings(self) -> None:
"""Test that empty results from dump_listings returns empty list."""
with patch(
"services.listing_fetcher.dump_listings",
new_callable=AsyncMock,
return_value=[],
):
mock_repo = AsyncMock()
mock_repo.get_listings = AsyncMock(return_value=[])
params = QueryParameters(listing_type=ListingType.RENT)
result = await dump_listings_full(params, mock_repo)
assert result == []
async def test_returns_only_new_listings_from_db(self) -> None:
"""Test that dump_listings_full fetches new listings by ID from the repository."""
mock_listing_1 = MagicMock()
mock_listing_1.id = 100
mock_listing_2 = MagicMock()
mock_listing_2.id = 200
with patch(
"services.listing_fetcher.dump_listings",
new_callable=AsyncMock,
return_value=[mock_listing_1, mock_listing_2],
):
mock_repo = AsyncMock()
mock_repo.get_listings = AsyncMock(
return_value=[mock_listing_1, mock_listing_2]
)
params = QueryParameters(listing_type=ListingType.RENT)
result = await dump_listings_full(params, mock_repo)
# Verify get_listings was called with the correct IDs
mock_repo.get_listings.assert_awaited_once_with(
only_ids=[100, 200]
)
assert len(result) == 2
class TestFetchSubquery:
"""Tests for _fetch_subquery helper."""
async def test_skips_subquery_with_zero_estimated_results(self) -> None:
"""Test that subqueries with 0 estimated results are skipped."""
sq = _make_subquery(estimated_results=0)
params = QueryParameters(listing_type=ListingType.RENT)
queue: asyncio.Queue[int | None] = asyncio.Queue()
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=MagicMock(),
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
assert ids_found == 0
assert queue.empty()
async def test_skips_subquery_with_none_estimated_results(self) -> None:
"""Test that subqueries with None estimated results are skipped."""
sq = _make_subquery(estimated_results=None)
params = QueryParameters(listing_type=ListingType.RENT)
queue: asyncio.Queue[int | None] = asyncio.Queue()
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=MagicMock(),
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
assert ids_found == 0
assert queue.empty()
async def test_enqueues_new_ids_only(self) -> None:
"""Test that only new (not existing) IDs are enqueued."""
sq = _make_subquery(estimated_results=10)
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
queue: asyncio.Queue[int | None] = asyncio.Queue()
existing_ids: set[int] = {101, 103}
mock_config = MagicMock()
mock_config.max_pages_per_query = 60
mock_config.request_delay_ms = 0
mock_config.max_concurrent_requests = 5
api_result = {
"properties": [
{"identifier": 101}, # existing
{"identifier": 102}, # new
{"identifier": 103}, # existing
{"identifier": 104}, # new
]
}
with patch(
"services.listing_fetcher.listing_query",
new_callable=AsyncMock,
return_value=api_result,
):
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=mock_config,
semaphore=asyncio.Semaphore(5),
existing_ids=existing_ids,
queue=queue,
)
assert ids_found == 2
# Verify that queued IDs are the new ones
queued = []
while not queue.empty():
queued.append(queue.get_nowait())
assert 102 in queued
assert 104 in queued
assert 101 not in queued
assert 103 not in queued
async def test_stops_on_circuit_breaker_error(self) -> None:
"""Test that CircuitBreakerOpenError breaks the page loop."""
sq = _make_subquery(estimated_results=100)
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
queue: asyncio.Queue[int | None] = asyncio.Queue()
mock_config = MagicMock()
mock_config.max_pages_per_query = 60
mock_config.request_delay_ms = 0
with patch(
"services.listing_fetcher.listing_query",
new_callable=AsyncMock,
side_effect=CircuitBreakerOpenError("open"),
):
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=mock_config,
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
assert ids_found == 0
assert queue.empty()
async def test_stops_on_throttling_error(self) -> None:
"""Test that ThrottlingError breaks the page loop."""
sq = _make_subquery(estimated_results=100)
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
queue: asyncio.Queue[int | None] = asyncio.Queue()
mock_config = MagicMock()
mock_config.max_pages_per_query = 60
mock_config.request_delay_ms = 0
with patch(
"services.listing_fetcher.listing_query",
new_callable=AsyncMock,
side_effect=ThrottlingError("throttled"),
):
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=mock_config,
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
assert ids_found == 0
assert queue.empty()
async def test_stops_on_generic_error(self) -> None:
"""Test that GENERIC_ERROR (past last page) stops pagination."""
sq = _make_subquery(estimated_results=100)
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
queue: asyncio.Queue[int | None] = asyncio.Queue()
mock_config = MagicMock()
mock_config.max_pages_per_query = 60
mock_config.request_delay_ms = 0
with patch(
"services.listing_fetcher.listing_query",
new_callable=AsyncMock,
side_effect=Exception("GENERIC_ERROR: no more results"),
):
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=mock_config,
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
assert ids_found == 0
assert queue.empty()
async def test_stops_on_unexpected_error(self) -> None:
"""Test that unexpected errors also stop pagination."""
sq = _make_subquery(estimated_results=100)
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
queue: asyncio.Queue[int | None] = asyncio.Queue()
mock_config = MagicMock()
mock_config.max_pages_per_query = 60
mock_config.request_delay_ms = 0
with patch(
"services.listing_fetcher.listing_query",
new_callable=AsyncMock,
side_effect=Exception("some network error"),
):
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=mock_config,
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
assert ids_found == 0
assert queue.empty()
async def test_stops_when_fewer_results_than_page_size(self) -> None:
"""Test that pagination stops when a page has fewer results than page_size."""
sq = _make_subquery(estimated_results=100)
params = QueryParameters(listing_type=ListingType.RENT, page_size=25)
queue: asyncio.Queue[int | None] = asyncio.Queue()
mock_config = MagicMock()
mock_config.max_pages_per_query = 60
mock_config.request_delay_ms = 0
# Return fewer properties than page_size
api_result = {
"properties": [
{"identifier": 1},
{"identifier": 2},
]
}
with patch(
"services.listing_fetcher.listing_query",
new_callable=AsyncMock,
return_value=api_result,
) as mock_query:
ids_found = await _fetch_subquery(
sq=sq,
parameters=params,
session=MagicMock(),
config=mock_config,
semaphore=asyncio.Semaphore(5),
existing_ids=set(),
queue=queue,
)
# Should have called listing_query exactly once (then stopped)
assert mock_query.await_count == 1
assert ids_found == 2
class TestDumpListings:
"""Tests for dump_listings."""
async def test_circuit_breaker_returns_empty_list(self) -> None:
"""Test that CircuitBreakerOpenError returns empty list."""
mock_repo = AsyncMock()
params = QueryParameters(listing_type=ListingType.RENT)
with patch("services.listing_fetcher.create_session") as mock_cs:
mock_cs.side_effect = CircuitBreakerOpenError("open")
result = await dump_listings(params, mock_repo)
assert result == []
async def test_returns_processed_listings(self) -> None:
"""Test that dump_listings returns processed listings from the pipeline."""
mock_repo = AsyncMock()
mock_repo.get_listing_ids = MagicMock(return_value=set())
params = QueryParameters(listing_type=ListingType.RENT)
mock_listing = MagicMock()
mock_listing.id = 42
mock_session_cm = AsyncMock()
mock_session = MagicMock()
mock_session_cm.__aenter__ = AsyncMock(return_value=mock_session)
mock_session_cm.__aexit__ = AsyncMock(return_value=False)
with (
patch(
"services.listing_fetcher.create_session",
return_value=mock_session_cm,
),
patch(
"services.listing_fetcher.QuerySplitter"
) as mock_splitter_cls,
patch(
"services.listing_fetcher._fetch_subquery",
new_callable=AsyncMock,
return_value=0,
),
):
mock_splitter = mock_splitter_cls.return_value
mock_splitter.split = AsyncMock(return_value=[])
mock_splitter.calculate_total_estimated_results = MagicMock(
return_value=0
)
result = await dump_listings(params, mock_repo)
# With no subqueries, no listings are processed
assert result == []
class TestNumWorkers:
"""Tests for NUM_WORKERS constant."""
def test_num_workers_is_positive(self) -> None:
"""Test that NUM_WORKERS is a positive integer."""
assert NUM_WORKERS > 0
def test_num_workers_value(self) -> None:
"""Test that NUM_WORKERS has the expected value."""
assert NUM_WORKERS == 20