"""Unit tests for QuerySplitter service.""" import pytest from unittest.mock import AsyncMock, patch from config.scraper_config import ScraperConfig from models.listing import ListingType, QueryParameters from services.query_splitter import QuerySplitter, SubQuery class TestScraperConfig: """Tests for the ScraperConfig dataclass.""" def test_default_values(self) -> None: """Test that default values are set correctly.""" config = ScraperConfig() assert config.max_concurrent_requests == 5 assert config.request_delay_ms == 100 assert config.result_cap == 1500 assert config.split_threshold == 1200 assert config.min_price_band == 100 assert config.max_pages_per_query == 60 assert config.proxy_url is None def test_from_env(self) -> None: """Test loading configuration from environment variables.""" with patch.dict( "os.environ", { "RIGHTMOVE_MAX_CONCURRENT": "10", "RIGHTMOVE_REQUEST_DELAY_MS": "200", "RIGHTMOVE_SPLIT_THRESHOLD": "1000", "RIGHTMOVE_MIN_PRICE_BAND": "50", "RIGHTMOVE_MAX_PAGES": "30", "RIGHTMOVE_PROXY_URL": "socks5://localhost:9050", }, ): config = ScraperConfig.from_env() assert config.max_concurrent_requests == 10 assert config.request_delay_ms == 200 assert config.split_threshold == 1000 assert config.min_price_band == 50 assert config.max_pages_per_query == 30 assert config.proxy_url == "socks5://localhost:9050" def test_from_env_empty_proxy(self) -> None: """Test that empty proxy URL is converted to None.""" with patch.dict( "os.environ", { "RIGHTMOVE_PROXY_URL": "", }, clear=False, ): config = ScraperConfig.from_env() assert config.proxy_url is None class TestSubQuery: """Tests for the SubQuery dataclass.""" def test_price_range_calculation(self) -> None: """Test that price_range is calculated correctly.""" sq = SubQuery( district="Kings Cross", min_bedrooms=2, max_bedrooms=2, min_price=1000, max_price=2000, ) assert sq.price_range == 1000 class TestQuerySplitter: """Tests for the QuerySplitter class.""" @pytest.fixture def config(self) -> ScraperConfig: """Create a test configuration.""" return ScraperConfig( max_concurrent_requests=5, request_delay_ms=10, # Faster for testing result_cap=1500, split_threshold=1200, min_price_band=100, max_pages_per_query=60, proxy_url=None, ) @pytest.fixture def splitter(self, config: ScraperConfig) -> QuerySplitter: """Create a QuerySplitter instance.""" return QuerySplitter(config) @pytest.fixture def parameters(self) -> QueryParameters: """Create test query parameters.""" return QueryParameters( listing_type=ListingType.RENT, min_bedrooms=2, max_bedrooms=3, min_price=1000, max_price=5000, district_names={"Kings Cross", "Angel"}, ) def test_create_initial_subqueries( self, splitter: QuerySplitter, parameters: QueryParameters ) -> None: """Test that initial subqueries are created correctly.""" districts = {"Kings Cross": "STATION^5168", "Angel": "STATION^1234"} subqueries = splitter.create_initial_subqueries(parameters, districts) # 2 districts × 2 bedroom counts (2,3) = 4 subqueries assert len(subqueries) == 4 # Check first subquery assert subqueries[0].district == "Kings Cross" assert subqueries[0].min_bedrooms == 2 assert subqueries[0].max_bedrooms == 2 assert subqueries[0].min_price == 1000 assert subqueries[0].max_price == 5000 def test_split_by_price(self, splitter: QuerySplitter) -> None: """Test that price splitting works correctly.""" sq = SubQuery( district="Kings Cross", min_bedrooms=2, max_bedrooms=2, min_price=1000, max_price=5000, ) halves = splitter.split_by_price(sq) assert len(halves) == 2 assert halves[0].min_price == 1000 assert halves[0].max_price == 3000 # midpoint assert halves[1].min_price == 3000 assert halves[1].max_price == 5000 # Both should have same bedroom range and district for half in halves: assert half.district == "Kings Cross" assert half.min_bedrooms == 2 assert half.max_bedrooms == 2 @pytest.mark.asyncio async def test_probe_result_count( self, splitter: QuerySplitter, parameters: QueryParameters ) -> None: """Test probing API for result count.""" sq = SubQuery( district="Kings Cross", min_bedrooms=2, max_bedrooms=2, min_price=1000, max_price=5000, ) mock_session = AsyncMock() # Mock the probe_query function with patch("services.query_splitter.probe_query") as mock_probe: mock_probe.return_value = {"totalAvailableResults": 800} count = await splitter.probe_result_count(sq, mock_session, parameters) assert count == 800 mock_probe.assert_called_once() @pytest.mark.asyncio async def test_probe_result_count_handles_error( self, splitter: QuerySplitter, parameters: QueryParameters ) -> None: """Test that probe_result_count handles errors gracefully.""" sq = SubQuery( district="Kings Cross", min_bedrooms=2, max_bedrooms=2, min_price=1000, max_price=5000, ) mock_session = AsyncMock() with patch("services.query_splitter.probe_query") as mock_probe: mock_probe.side_effect = Exception("API error") count = await splitter.probe_result_count(sq, mock_session, parameters) # Should return 0 on error assert count == 0 @pytest.mark.asyncio async def test_adaptive_split_no_split_needed( self, splitter: QuerySplitter, parameters: QueryParameters ) -> None: """Test adaptive split when results are below threshold.""" sq = SubQuery( district="Kings Cross", min_bedrooms=2, max_bedrooms=2, min_price=1000, max_price=2000, ) mock_session = AsyncMock() mock_semaphore = AsyncMock() with patch("services.query_splitter.probe_query") as mock_probe: # First half has 600 results, second half has 500 mock_probe.side_effect = [ {"totalAvailableResults": 600}, {"totalAvailableResults": 500}, ] result = await splitter.adaptive_split( sq, mock_session, parameters, mock_semaphore ) # Both halves are under threshold (1200), so we get 2 subqueries back assert len(result) == 2 assert result[0].estimated_results == 600 assert result[1].estimated_results == 500 @pytest.mark.asyncio async def test_adaptive_split_recursive_splitting( self, splitter: QuerySplitter, parameters: QueryParameters ) -> None: """Test adaptive split performs recursive splitting when needed.""" sq = SubQuery( district="Kings Cross", min_bedrooms=2, max_bedrooms=2, min_price=1000, max_price=5000, ) mock_session = AsyncMock() mock_semaphore = AsyncMock() with patch("services.query_splitter.probe_query") as mock_probe: # First split: 1000-3000 has 1300 (over threshold), 3000-5000 has 800 # Second split of 1000-3000: 1000-2000 has 700, 2000-3000 has 600 mock_probe.side_effect = [ {"totalAvailableResults": 1300}, # First half - needs more splitting {"totalAvailableResults": 800}, # Second half - OK {"totalAvailableResults": 700}, # First quarter - OK {"totalAvailableResults": 600}, # Second quarter - OK ] result = await splitter.adaptive_split( sq, mock_session, parameters, mock_semaphore ) # Should get 3 subqueries: [1000-2000 (700), 2000-3000 (600), 3000-5000 (800)] assert len(result) == 3 @pytest.mark.asyncio async def test_adaptive_split_respects_min_price_band( self, splitter: QuerySplitter, parameters: QueryParameters ) -> None: """Test that adaptive split stops at min_price_band.""" sq = SubQuery( district="Kings Cross", min_bedrooms=2, max_bedrooms=2, min_price=1000, max_price=1050, # Only 50 range, below min_price_band of 100 estimated_results=1500, # Over threshold but can't split ) mock_session = AsyncMock() mock_semaphore = AsyncMock() result = await splitter.adaptive_split( sq, mock_session, parameters, mock_semaphore ) # Can't split below min_price_band, should return original assert len(result) == 1 assert result[0].min_price == 1000 assert result[0].max_price == 1050 def test_calculate_total_estimated_results( self, splitter: QuerySplitter ) -> None: """Test calculation of total estimated results.""" subqueries = [ SubQuery( district="Kings Cross", min_bedrooms=2, max_bedrooms=2, min_price=1000, max_price=2000, estimated_results=500, ), SubQuery( district="Kings Cross", min_bedrooms=3, max_bedrooms=3, min_price=1000, max_price=2000, estimated_results=300, ), SubQuery( district="Angel", min_bedrooms=2, max_bedrooms=2, min_price=1000, max_price=2000, estimated_results=None, # Not probed ), ] total = splitter.calculate_total_estimated_results(subqueries) assert total == 800 # 500 + 300 + 0 @pytest.mark.asyncio async def test_split_integration( self, splitter: QuerySplitter, parameters: QueryParameters ) -> None: """Integration test for the full split workflow.""" mock_session = AsyncMock() mock_districts = {"Kings Cross": "STATION^5168", "Angel": "STATION^1234"} with patch("services.query_splitter.get_districts", return_value=mock_districts): with patch("services.query_splitter.probe_query") as mock_probe: # Mock probe results for each initial subquery # 2 districts × 2 bedroom counts = 4 initial subqueries mock_probe.side_effect = [ {"totalAvailableResults": 500}, # KC 2BR - OK {"totalAvailableResults": 1300}, # KC 3BR - needs split {"totalAvailableResults": 600}, # Angel 2BR - OK {"totalAvailableResults": 800}, # Angel 3BR - OK # Split KC 3BR {"totalAvailableResults": 700}, # KC 3BR first half {"totalAvailableResults": 600}, # KC 3BR second half ] result = await splitter.split(parameters, mock_session) # Should have 5 subqueries total: # KC 2BR (500), KC 3BR split into 2 (700+600), Angel 2BR (600), Angel 3BR (800) assert len(result) == 5 # Verify total estimated results total = splitter.calculate_total_estimated_results(result) assert total == 3200 # 500 + 700 + 600 + 600 + 800 @pytest.mark.asyncio async def test_split_with_on_progress_callback( self, splitter: QuerySplitter, parameters: QueryParameters ) -> None: """Test that on_progress callback is called during split.""" mock_session = AsyncMock() mock_districts = {"Kings Cross": "STATION^5168", "Angel": "STATION^1234"} progress_calls = [] def on_progress(phase: str, message: str) -> None: progress_calls.append((phase, message)) with patch("services.query_splitter.get_districts", return_value=mock_districts): with patch("services.query_splitter.probe_query") as mock_probe: mock_probe.return_value = {"totalAvailableResults": 500} await splitter.split(parameters, mock_session, on_progress) # Should have received at least 2 progress updates assert len(progress_calls) >= 2 phases = [call[0] for call in progress_calls] assert "splitting" in phases assert "splitting_complete" in phases