"""Unit tests for the listing fetcher service.""" import asyncio from unittest.mock import AsyncMock, MagicMock, patch import pytest from models.listing import ListingType, QueryParameters from rec.exceptions import CircuitBreakerOpenError, ThrottlingError from services.listing_fetcher import ( NUM_WORKERS, _fetch_subquery, dump_listings, dump_listings_full, ) from services.query_splitter import SubQuery def _make_subquery(**kwargs) -> SubQuery: """Create a SubQuery with sensible defaults for testing.""" defaults = dict( district="REGION^123", min_bedrooms=1, max_bedrooms=3, min_price=1000, max_price=3000, estimated_results=50, ) defaults.update(kwargs) return SubQuery(**defaults) class TestDumpListingsFull: """Tests for dump_listings_full.""" async def test_returns_empty_list_when_no_new_listings(self) -> None: """Test that empty results from dump_listings returns empty list.""" with patch( "services.listing_fetcher.dump_listings", new_callable=AsyncMock, return_value=[], ): mock_repo = AsyncMock() mock_repo.get_listings = AsyncMock(return_value=[]) params = QueryParameters(listing_type=ListingType.RENT) result = await dump_listings_full(params, mock_repo) assert result == [] async def test_returns_only_new_listings_from_db(self) -> None: """Test that dump_listings_full fetches new listings by ID from the repository.""" mock_listing_1 = MagicMock() mock_listing_1.id = 100 mock_listing_2 = MagicMock() mock_listing_2.id = 200 with patch( "services.listing_fetcher.dump_listings", new_callable=AsyncMock, return_value=[mock_listing_1, mock_listing_2], ): mock_repo = AsyncMock() mock_repo.get_listings = AsyncMock( return_value=[mock_listing_1, mock_listing_2] ) params = QueryParameters(listing_type=ListingType.RENT) result = await dump_listings_full(params, mock_repo) # Verify get_listings was called with the correct IDs mock_repo.get_listings.assert_awaited_once_with( only_ids=[100, 200] ) assert len(result) == 2 class TestFetchSubquery: """Tests for _fetch_subquery helper.""" async def test_skips_subquery_with_zero_estimated_results(self) -> None: """Test that subqueries with 0 estimated results are skipped.""" sq = _make_subquery(estimated_results=0) params = QueryParameters(listing_type=ListingType.RENT) queue: asyncio.Queue[int | None] = asyncio.Queue() ids_found = await _fetch_subquery( sq=sq, parameters=params, session=MagicMock(), config=MagicMock(), semaphore=asyncio.Semaphore(5), existing_ids=set(), queue=queue, ) assert ids_found == 0 assert queue.empty() async def test_skips_subquery_with_none_estimated_results(self) -> None: """Test that subqueries with None estimated results are skipped.""" sq = _make_subquery(estimated_results=None) params = QueryParameters(listing_type=ListingType.RENT) queue: asyncio.Queue[int | None] = asyncio.Queue() ids_found = await _fetch_subquery( sq=sq, parameters=params, session=MagicMock(), config=MagicMock(), semaphore=asyncio.Semaphore(5), existing_ids=set(), queue=queue, ) assert ids_found == 0 assert queue.empty() async def test_enqueues_new_ids_only(self) -> None: """Test that only new (not existing) IDs are enqueued.""" sq = _make_subquery(estimated_results=10) params = QueryParameters(listing_type=ListingType.RENT, page_size=25) queue: asyncio.Queue[int | None] = asyncio.Queue() existing_ids: set[int] = {101, 103} mock_config = MagicMock() mock_config.max_pages_per_query = 60 mock_config.request_delay_ms = 0 mock_config.max_concurrent_requests = 5 api_result = { "properties": [ {"identifier": 101}, # existing {"identifier": 102}, # new {"identifier": 103}, # existing {"identifier": 104}, # new ] } with patch( "services.listing_fetcher.listing_query", new_callable=AsyncMock, return_value=api_result, ): ids_found = await _fetch_subquery( sq=sq, parameters=params, session=MagicMock(), config=mock_config, semaphore=asyncio.Semaphore(5), existing_ids=existing_ids, queue=queue, ) assert ids_found == 2 # Verify that queued IDs are the new ones queued = [] while not queue.empty(): queued.append(queue.get_nowait()) assert 102 in queued assert 104 in queued assert 101 not in queued assert 103 not in queued async def test_stops_on_circuit_breaker_error(self) -> None: """Test that CircuitBreakerOpenError breaks the page loop.""" sq = _make_subquery(estimated_results=100) params = QueryParameters(listing_type=ListingType.RENT, page_size=25) queue: asyncio.Queue[int | None] = asyncio.Queue() mock_config = MagicMock() mock_config.max_pages_per_query = 60 mock_config.request_delay_ms = 0 with patch( "services.listing_fetcher.listing_query", new_callable=AsyncMock, side_effect=CircuitBreakerOpenError("open"), ): ids_found = await _fetch_subquery( sq=sq, parameters=params, session=MagicMock(), config=mock_config, semaphore=asyncio.Semaphore(5), existing_ids=set(), queue=queue, ) assert ids_found == 0 assert queue.empty() async def test_stops_on_throttling_error(self) -> None: """Test that ThrottlingError breaks the page loop.""" sq = _make_subquery(estimated_results=100) params = QueryParameters(listing_type=ListingType.RENT, page_size=25) queue: asyncio.Queue[int | None] = asyncio.Queue() mock_config = MagicMock() mock_config.max_pages_per_query = 60 mock_config.request_delay_ms = 0 with patch( "services.listing_fetcher.listing_query", new_callable=AsyncMock, side_effect=ThrottlingError("throttled"), ): ids_found = await _fetch_subquery( sq=sq, parameters=params, session=MagicMock(), config=mock_config, semaphore=asyncio.Semaphore(5), existing_ids=set(), queue=queue, ) assert ids_found == 0 assert queue.empty() async def test_stops_on_generic_error(self) -> None: """Test that GENERIC_ERROR (past last page) stops pagination.""" sq = _make_subquery(estimated_results=100) params = QueryParameters(listing_type=ListingType.RENT, page_size=25) queue: asyncio.Queue[int | None] = asyncio.Queue() mock_config = MagicMock() mock_config.max_pages_per_query = 60 mock_config.request_delay_ms = 0 with patch( "services.listing_fetcher.listing_query", new_callable=AsyncMock, side_effect=Exception("GENERIC_ERROR: no more results"), ): ids_found = await _fetch_subquery( sq=sq, parameters=params, session=MagicMock(), config=mock_config, semaphore=asyncio.Semaphore(5), existing_ids=set(), queue=queue, ) assert ids_found == 0 assert queue.empty() async def test_stops_on_unexpected_error(self) -> None: """Test that unexpected errors also stop pagination.""" sq = _make_subquery(estimated_results=100) params = QueryParameters(listing_type=ListingType.RENT, page_size=25) queue: asyncio.Queue[int | None] = asyncio.Queue() mock_config = MagicMock() mock_config.max_pages_per_query = 60 mock_config.request_delay_ms = 0 with patch( "services.listing_fetcher.listing_query", new_callable=AsyncMock, side_effect=Exception("some network error"), ): ids_found = await _fetch_subquery( sq=sq, parameters=params, session=MagicMock(), config=mock_config, semaphore=asyncio.Semaphore(5), existing_ids=set(), queue=queue, ) assert ids_found == 0 assert queue.empty() async def test_stops_when_fewer_results_than_page_size(self) -> None: """Test that pagination stops when a page has fewer results than page_size.""" sq = _make_subquery(estimated_results=100) params = QueryParameters(listing_type=ListingType.RENT, page_size=25) queue: asyncio.Queue[int | None] = asyncio.Queue() mock_config = MagicMock() mock_config.max_pages_per_query = 60 mock_config.request_delay_ms = 0 # Return fewer properties than page_size api_result = { "properties": [ {"identifier": 1}, {"identifier": 2}, ] } with patch( "services.listing_fetcher.listing_query", new_callable=AsyncMock, return_value=api_result, ) as mock_query: ids_found = await _fetch_subquery( sq=sq, parameters=params, session=MagicMock(), config=mock_config, semaphore=asyncio.Semaphore(5), existing_ids=set(), queue=queue, ) # Should have called listing_query exactly once (then stopped) assert mock_query.await_count == 1 assert ids_found == 2 class TestDumpListings: """Tests for dump_listings.""" async def test_circuit_breaker_returns_empty_list(self) -> None: """Test that CircuitBreakerOpenError returns empty list.""" mock_repo = AsyncMock() params = QueryParameters(listing_type=ListingType.RENT) with patch("services.listing_fetcher.create_session") as mock_cs: mock_cs.side_effect = CircuitBreakerOpenError("open") result = await dump_listings(params, mock_repo) assert result == [] async def test_returns_processed_listings(self) -> None: """Test that dump_listings returns processed listings from the pipeline.""" mock_repo = AsyncMock() mock_repo.get_listing_ids = MagicMock(return_value=set()) params = QueryParameters(listing_type=ListingType.RENT) mock_listing = MagicMock() mock_listing.id = 42 mock_session_cm = AsyncMock() mock_session = MagicMock() mock_session_cm.__aenter__ = AsyncMock(return_value=mock_session) mock_session_cm.__aexit__ = AsyncMock(return_value=False) with ( patch( "services.listing_fetcher.create_session", return_value=mock_session_cm, ), patch( "services.listing_fetcher.QuerySplitter" ) as mock_splitter_cls, patch( "services.listing_fetcher._fetch_subquery", new_callable=AsyncMock, return_value=0, ), ): mock_splitter = mock_splitter_cls.return_value mock_splitter.split = AsyncMock(return_value=[]) mock_splitter.calculate_total_estimated_results = MagicMock( return_value=0 ) result = await dump_listings(params, mock_repo) # With no subqueries, no listings are processed assert result == [] class TestNumWorkers: """Tests for NUM_WORKERS constant.""" def test_num_workers_is_positive(self) -> None: """Test that NUM_WORKERS is a positive integer.""" assert NUM_WORKERS > 0 def test_num_workers_value(self) -> None: """Test that NUM_WORKERS has the expected value.""" assert NUM_WORKERS == 20