"""Integration tests for ListingProcessor and processing steps.""" from unittest.mock import AsyncMock import pytest from sqlalchemy import Engine from listing_processor import ( DetectFloorplanStep, FetchImagesStep, FetchListingDetailsStep, ListingProcessor, ) from models.listing import ListingType from repositories.listing_repository import ListingRepository # ---------- Processor structure tests ---------- def test_processor_has_three_steps(listing_repository: ListingRepository) -> None: processor = ListingProcessor(listing_repository) assert len(processor.process_steps) == 3 def test_step_order(listing_repository: ListingRepository) -> None: processor = ListingProcessor(listing_repository) types = [type(s) for s in processor.process_steps] assert types == [FetchListingDetailsStep, FetchImagesStep, DetectFloorplanStep] # ---------- Processing flow ---------- @pytest.mark.asyncio async def test_process_calls_steps_in_order( listing_repository: ListingRepository, rent_listing_factory, ) -> None: # Seed a listing so mark_seen doesn't fail listing = rent_listing_factory(id=42) await listing_repository.upsert_listings([listing]) processor = ListingProcessor(listing_repository) call_order: list[str] = [] for step in processor.process_steps: name = type(step).__name__ step.needs_processing = AsyncMock(return_value=True) step.process = AsyncMock( side_effect=lambda lid, n=name: call_order.append(n) or listing ) result = await processor.process_listing(42) assert result is not None assert call_order == [ "FetchListingDetailsStep", "FetchImagesStep", "DetectFloorplanStep", ] @pytest.mark.asyncio async def test_step_failure_stops_pipeline( listing_repository: ListingRepository, rent_listing_factory, ) -> None: listing = rent_listing_factory(id=42) await listing_repository.upsert_listings([listing]) processor = ListingProcessor(listing_repository) processor.process_steps[0].needs_processing = AsyncMock(return_value=True) processor.process_steps[0].process = AsyncMock(side_effect=RuntimeError("boom")) processor.process_steps[1].needs_processing = AsyncMock(return_value=True) processor.process_steps[1].process = AsyncMock() processor.process_steps[2].needs_processing = AsyncMock(return_value=True) processor.process_steps[2].process = AsyncMock() result = await processor.process_listing(42) assert result is None # Second and third steps should not have been called processor.process_steps[1].process.assert_not_called() processor.process_steps[2].process.assert_not_called() @pytest.mark.asyncio async def test_callback_fired_per_step( listing_repository: ListingRepository, rent_listing_factory, ) -> None: listing = rent_listing_factory(id=42) await listing_repository.upsert_listings([listing]) processor = ListingProcessor(listing_repository) for step in processor.process_steps: step.needs_processing = AsyncMock(return_value=True) step.process = AsyncMock(return_value=listing) callback_args: list[str] = [] await processor.process_listing(42, on_step_complete=lambda name: callback_args.append(name)) assert callback_args == ["details", "images", "ocr"] @pytest.mark.asyncio async def test_step_skipped_when_not_needed( listing_repository: ListingRepository, rent_listing_factory, ) -> None: listing = rent_listing_factory(id=42) await listing_repository.upsert_listings([listing]) processor = ListingProcessor(listing_repository) for step in processor.process_steps: step.needs_processing = AsyncMock(return_value=False) step.process = AsyncMock() await processor.process_listing(42) for step in processor.process_steps: step.process.assert_not_called() # ---------- Individual step tests ---------- @pytest.mark.asyncio async def test_fetch_details_creates_listing( listing_repository: ListingRepository, monkeypatch: pytest.MonkeyPatch, ) -> None: sample_detail = { "property": { "price": 2000, "bedrooms": 2, "branch": {"brandName": "Test Agency"}, "councilTaxInfo": {"content": [{"value": "C"}]}, "longitude": -0.1, "latitude": 51.5, "photos": [{"thumbnailUrl": "https://example.com/photo.jpg"}], "floorplans": [], "letFurnishType": "furnished", "letDateAvailable": "Now", "visible": True, } } monkeypatch.setattr("listing_processor.detail_query", AsyncMock(return_value=sample_detail)) step = FetchListingDetailsStep(listing_repository, ListingType.RENT) result = await step.process(999) assert result is not None assert result.id == 999 assert result.price == 2000 # Verify it was persisted stored = await listing_repository.get_listings(only_ids=[999]) assert len(stored) == 1 @pytest.mark.asyncio async def test_processor_marks_seen( listing_repository: ListingRepository, rent_listing_factory, ) -> None: from datetime import datetime, timedelta old_time = datetime(2020, 1, 1) listing = rent_listing_factory(id=50, last_seen=old_time) await listing_repository.upsert_listings([listing]) processor = ListingProcessor(listing_repository) # Skip all steps so we only test mark_seen for step in processor.process_steps: step.needs_processing = AsyncMock(return_value=False) step.process = AsyncMock() await processor.process_listing(50) updated = await listing_repository.get_listings(only_ids=[50]) assert len(updated) == 1 # last_seen should have been updated to roughly now assert updated[0].last_seen > old_time