187 lines
5.8 KiB
Python
187 lines
5.8 KiB
Python
|
|
"""Integration tests for ListingProcessor and processing steps."""
|
||
|
|
from unittest.mock import AsyncMock
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
from sqlalchemy import Engine
|
||
|
|
|
||
|
|
from listing_processor import (
|
||
|
|
DetectFloorplanStep,
|
||
|
|
FetchImagesStep,
|
||
|
|
FetchListingDetailsStep,
|
||
|
|
ListingProcessor,
|
||
|
|
)
|
||
|
|
from models.listing import ListingType
|
||
|
|
from repositories.listing_repository import ListingRepository
|
||
|
|
|
||
|
|
|
||
|
|
# ---------- Processor structure tests ----------
|
||
|
|
|
||
|
|
|
||
|
|
def test_processor_has_three_steps(listing_repository: ListingRepository) -> None:
|
||
|
|
processor = ListingProcessor(listing_repository)
|
||
|
|
assert len(processor.process_steps) == 3
|
||
|
|
|
||
|
|
|
||
|
|
def test_step_order(listing_repository: ListingRepository) -> None:
|
||
|
|
processor = ListingProcessor(listing_repository)
|
||
|
|
types = [type(s) for s in processor.process_steps]
|
||
|
|
assert types == [FetchListingDetailsStep, FetchImagesStep, DetectFloorplanStep]
|
||
|
|
|
||
|
|
|
||
|
|
# ---------- Processing flow ----------
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_process_calls_steps_in_order(
|
||
|
|
listing_repository: ListingRepository,
|
||
|
|
rent_listing_factory,
|
||
|
|
) -> None:
|
||
|
|
# Seed a listing so mark_seen doesn't fail
|
||
|
|
listing = rent_listing_factory(id=42)
|
||
|
|
await listing_repository.upsert_listings([listing])
|
||
|
|
|
||
|
|
processor = ListingProcessor(listing_repository)
|
||
|
|
|
||
|
|
call_order: list[str] = []
|
||
|
|
for step in processor.process_steps:
|
||
|
|
name = type(step).__name__
|
||
|
|
step.needs_processing = AsyncMock(return_value=True)
|
||
|
|
step.process = AsyncMock(
|
||
|
|
side_effect=lambda lid, n=name: call_order.append(n) or listing
|
||
|
|
)
|
||
|
|
|
||
|
|
result = await processor.process_listing(42)
|
||
|
|
assert result is not None
|
||
|
|
assert call_order == [
|
||
|
|
"FetchListingDetailsStep",
|
||
|
|
"FetchImagesStep",
|
||
|
|
"DetectFloorplanStep",
|
||
|
|
]
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_step_failure_stops_pipeline(
|
||
|
|
listing_repository: ListingRepository,
|
||
|
|
rent_listing_factory,
|
||
|
|
) -> None:
|
||
|
|
listing = rent_listing_factory(id=42)
|
||
|
|
await listing_repository.upsert_listings([listing])
|
||
|
|
|
||
|
|
processor = ListingProcessor(listing_repository)
|
||
|
|
|
||
|
|
processor.process_steps[0].needs_processing = AsyncMock(return_value=True)
|
||
|
|
processor.process_steps[0].process = AsyncMock(side_effect=RuntimeError("boom"))
|
||
|
|
processor.process_steps[1].needs_processing = AsyncMock(return_value=True)
|
||
|
|
processor.process_steps[1].process = AsyncMock()
|
||
|
|
processor.process_steps[2].needs_processing = AsyncMock(return_value=True)
|
||
|
|
processor.process_steps[2].process = AsyncMock()
|
||
|
|
|
||
|
|
result = await processor.process_listing(42)
|
||
|
|
assert result is None
|
||
|
|
# Second and third steps should not have been called
|
||
|
|
processor.process_steps[1].process.assert_not_called()
|
||
|
|
processor.process_steps[2].process.assert_not_called()
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_callback_fired_per_step(
|
||
|
|
listing_repository: ListingRepository,
|
||
|
|
rent_listing_factory,
|
||
|
|
) -> None:
|
||
|
|
listing = rent_listing_factory(id=42)
|
||
|
|
await listing_repository.upsert_listings([listing])
|
||
|
|
|
||
|
|
processor = ListingProcessor(listing_repository)
|
||
|
|
|
||
|
|
for step in processor.process_steps:
|
||
|
|
step.needs_processing = AsyncMock(return_value=True)
|
||
|
|
step.process = AsyncMock(return_value=listing)
|
||
|
|
|
||
|
|
callback_args: list[str] = []
|
||
|
|
await processor.process_listing(42, on_step_complete=lambda name: callback_args.append(name))
|
||
|
|
|
||
|
|
assert callback_args == ["details", "images", "ocr"]
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_step_skipped_when_not_needed(
|
||
|
|
listing_repository: ListingRepository,
|
||
|
|
rent_listing_factory,
|
||
|
|
) -> None:
|
||
|
|
listing = rent_listing_factory(id=42)
|
||
|
|
await listing_repository.upsert_listings([listing])
|
||
|
|
|
||
|
|
processor = ListingProcessor(listing_repository)
|
||
|
|
|
||
|
|
for step in processor.process_steps:
|
||
|
|
step.needs_processing = AsyncMock(return_value=False)
|
||
|
|
step.process = AsyncMock()
|
||
|
|
|
||
|
|
await processor.process_listing(42)
|
||
|
|
|
||
|
|
for step in processor.process_steps:
|
||
|
|
step.process.assert_not_called()
|
||
|
|
|
||
|
|
|
||
|
|
# ---------- Individual step tests ----------
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_fetch_details_creates_listing(
|
||
|
|
listing_repository: ListingRepository,
|
||
|
|
monkeypatch: pytest.MonkeyPatch,
|
||
|
|
) -> None:
|
||
|
|
sample_detail = {
|
||
|
|
"property": {
|
||
|
|
"price": 2000,
|
||
|
|
"bedrooms": 2,
|
||
|
|
"branch": {"brandName": "Test Agency"},
|
||
|
|
"councilTaxInfo": {"content": [{"value": "C"}]},
|
||
|
|
"longitude": -0.1,
|
||
|
|
"latitude": 51.5,
|
||
|
|
"photos": [{"thumbnailUrl": "https://example.com/photo.jpg"}],
|
||
|
|
"floorplans": [],
|
||
|
|
"letFurnishType": "furnished",
|
||
|
|
"letDateAvailable": "Now",
|
||
|
|
"visible": True,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
monkeypatch.setattr("listing_processor.detail_query", AsyncMock(return_value=sample_detail))
|
||
|
|
|
||
|
|
step = FetchListingDetailsStep(listing_repository, ListingType.RENT)
|
||
|
|
result = await step.process(999)
|
||
|
|
|
||
|
|
assert result is not None
|
||
|
|
assert result.id == 999
|
||
|
|
assert result.price == 2000
|
||
|
|
|
||
|
|
# Verify it was persisted
|
||
|
|
stored = await listing_repository.get_listings(only_ids=[999])
|
||
|
|
assert len(stored) == 1
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_processor_marks_seen(
|
||
|
|
listing_repository: ListingRepository,
|
||
|
|
rent_listing_factory,
|
||
|
|
) -> None:
|
||
|
|
from datetime import datetime, timedelta
|
||
|
|
|
||
|
|
old_time = datetime(2020, 1, 1)
|
||
|
|
listing = rent_listing_factory(id=50, last_seen=old_time)
|
||
|
|
await listing_repository.upsert_listings([listing])
|
||
|
|
|
||
|
|
processor = ListingProcessor(listing_repository)
|
||
|
|
|
||
|
|
# Skip all steps so we only test mark_seen
|
||
|
|
for step in processor.process_steps:
|
||
|
|
step.needs_processing = AsyncMock(return_value=False)
|
||
|
|
step.process = AsyncMock()
|
||
|
|
|
||
|
|
await processor.process_listing(50)
|
||
|
|
|
||
|
|
updated = await listing_repository.get_listings(only_ids=[50])
|
||
|
|
assert len(updated) == 1
|
||
|
|
# last_seen should have been updated to roughly now
|
||
|
|
assert updated[0].last_seen > old_time
|