- Extract rate limiter DRY: consolidate 3 duplicated check/respond paths into _check_counter and _enforce_limit helpers, add proper type annotations - Replace bare Exception raises with FloorplanDownloadError and RightmoveApiError; narrow catch clauses to specific exception types; fix Step base class to inherit from ABC - Consolidate MAX_OCR_WORKERS into config/scraper_config.py; extract _find_tenure_value helper to deduplicate tenure parsing - Extract _build_poi_distances_lookup from stream endpoint to reduce nesting - Fix csv_exporter: optional decisions.json, NaN instead of -1 sentinels, guard against division by zero on missing square meters - Fix notifications.py broken list[Surface]() constructor, database.py stale comments and missing type annotation, auth.py type:ignore, ui_exporter.py stale TODO - Fix 3 pre-existing test failures: mock cache layer in streaming tests, bypass rate limiter for test isolation, fix cache invalidation test to account for two-pattern scan loop
186 lines
5.8 KiB
Python
186 lines
5.8 KiB
Python
"""Integration tests for ListingProcessor and processing steps."""
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
from sqlalchemy import Engine
|
|
|
|
from listing_processor import (
|
|
DetectFloorplanStep,
|
|
FetchImagesStep,
|
|
FetchListingDetailsStep,
|
|
ListingProcessor,
|
|
)
|
|
from models.listing import ListingType
|
|
from repositories.listing_repository import ListingRepository
|
|
|
|
|
|
# ---------- Processor structure tests ----------
|
|
|
|
|
|
def test_processor_has_three_steps(listing_repository: ListingRepository) -> None:
|
|
processor = ListingProcessor(listing_repository)
|
|
assert len(processor.process_steps) == 3
|
|
|
|
|
|
def test_step_order(listing_repository: ListingRepository) -> None:
|
|
processor = ListingProcessor(listing_repository)
|
|
types = [type(s) for s in processor.process_steps]
|
|
assert types == [FetchListingDetailsStep, FetchImagesStep, DetectFloorplanStep]
|
|
|
|
|
|
# ---------- Processing flow ----------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_process_calls_steps_in_order(
|
|
listing_repository: ListingRepository,
|
|
rent_listing_factory,
|
|
) -> None:
|
|
# Seed a listing so mark_seen doesn't fail
|
|
listing = rent_listing_factory(id=42)
|
|
await listing_repository.upsert_listings([listing])
|
|
|
|
processor = ListingProcessor(listing_repository)
|
|
|
|
call_order: list[str] = []
|
|
for step in processor.process_steps:
|
|
name = type(step).__name__
|
|
step.needs_processing = AsyncMock(return_value=True)
|
|
step.process = AsyncMock(
|
|
side_effect=lambda lid, n=name: call_order.append(n) or listing
|
|
)
|
|
|
|
result = await processor.process_listing(42)
|
|
assert result is not None
|
|
assert call_order == [
|
|
"FetchListingDetailsStep",
|
|
"FetchImagesStep",
|
|
"DetectFloorplanStep",
|
|
]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_step_failure_stops_pipeline(
|
|
listing_repository: ListingRepository,
|
|
rent_listing_factory,
|
|
) -> None:
|
|
listing = rent_listing_factory(id=42)
|
|
await listing_repository.upsert_listings([listing])
|
|
|
|
processor = ListingProcessor(listing_repository)
|
|
|
|
processor.process_steps[0].needs_processing = AsyncMock(return_value=True)
|
|
processor.process_steps[0].process = AsyncMock(side_effect=ValueError("boom"))
|
|
processor.process_steps[1].needs_processing = AsyncMock(return_value=True)
|
|
processor.process_steps[1].process = AsyncMock()
|
|
processor.process_steps[2].needs_processing = AsyncMock(return_value=True)
|
|
processor.process_steps[2].process = AsyncMock()
|
|
|
|
result = await processor.process_listing(42)
|
|
assert result is None
|
|
# Second and third steps should not have been called
|
|
processor.process_steps[1].process.assert_not_called()
|
|
processor.process_steps[2].process.assert_not_called()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_callback_fired_per_step(
|
|
listing_repository: ListingRepository,
|
|
rent_listing_factory,
|
|
) -> None:
|
|
listing = rent_listing_factory(id=42)
|
|
await listing_repository.upsert_listings([listing])
|
|
|
|
processor = ListingProcessor(listing_repository)
|
|
|
|
for step in processor.process_steps:
|
|
step.needs_processing = AsyncMock(return_value=True)
|
|
step.process = AsyncMock(return_value=listing)
|
|
|
|
callback_args: list[str] = []
|
|
await processor.process_listing(42, on_step_complete=lambda name: callback_args.append(name))
|
|
|
|
assert callback_args == ["details", "images", "ocr"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_step_skipped_when_not_needed(
|
|
listing_repository: ListingRepository,
|
|
rent_listing_factory,
|
|
) -> None:
|
|
listing = rent_listing_factory(id=42)
|
|
await listing_repository.upsert_listings([listing])
|
|
|
|
processor = ListingProcessor(listing_repository)
|
|
|
|
for step in processor.process_steps:
|
|
step.needs_processing = AsyncMock(return_value=False)
|
|
step.process = AsyncMock()
|
|
|
|
await processor.process_listing(42)
|
|
|
|
for step in processor.process_steps:
|
|
step.process.assert_not_called()
|
|
|
|
|
|
# ---------- Individual step tests ----------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_details_creates_listing(
|
|
listing_repository: ListingRepository,
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
sample_detail = {
|
|
"property": {
|
|
"price": 2000,
|
|
"bedrooms": 2,
|
|
"branch": {"brandName": "Test Agency"},
|
|
"councilTaxInfo": {"content": [{"value": "C"}]},
|
|
"longitude": -0.1,
|
|
"latitude": 51.5,
|
|
"photos": [{"thumbnailUrl": "https://example.com/photo.jpg"}],
|
|
"floorplans": [],
|
|
"letFurnishType": "furnished",
|
|
"letDateAvailable": "Now",
|
|
"visible": True,
|
|
}
|
|
}
|
|
monkeypatch.setattr("listing_processor.detail_query", AsyncMock(return_value=sample_detail))
|
|
|
|
step = FetchListingDetailsStep(listing_repository, ListingType.RENT)
|
|
result = await step.process(999)
|
|
|
|
assert result is not None
|
|
assert result.id == 999
|
|
assert result.price == 2000
|
|
|
|
# Verify it was persisted
|
|
stored = await listing_repository.get_listings(only_ids=[999])
|
|
assert len(stored) == 1
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_processor_marks_seen(
|
|
listing_repository: ListingRepository,
|
|
rent_listing_factory,
|
|
) -> None:
|
|
from datetime import datetime, timedelta
|
|
|
|
old_time = datetime(2020, 1, 1)
|
|
listing = rent_listing_factory(id=50, last_seen=old_time)
|
|
await listing_repository.upsert_listings([listing])
|
|
|
|
processor = ListingProcessor(listing_repository)
|
|
|
|
# Skip all steps so we only test mark_seen
|
|
for step in processor.process_steps:
|
|
step.needs_processing = AsyncMock(return_value=False)
|
|
step.process = AsyncMock()
|
|
|
|
await processor.process_listing(50)
|
|
|
|
updated = await listing_repository.get_listings(only_ids=[50])
|
|
assert len(updated) == 1
|
|
# last_seen should have been updated to roughly now
|
|
assert updated[0].last_seen > old_time
|