Refactor codebase following Clean Code principles and add 229 tests
- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher) - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens - Consolidate duplicate filter methods in listing_repository - Move hardcoded config to env vars with backward-compatible defaults - Simplify CLI decorator to auto-build QueryParameters - Add deprecation docstring to data_access.py - Test count: 158 → 387 (all passing)
This commit is contained in:
parent
7e05b3c971
commit
150342bb9e
48 changed files with 5029 additions and 990 deletions
215
crawler/tests/unit/test_image_fetcher.py
Normal file
215
crawler/tests/unit/test_image_fetcher.py
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
"""Unit tests for the image fetcher service."""
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from datetime import datetime
|
||||
|
||||
import aiohttp
|
||||
import pytest
|
||||
from tenacity import stop_after_attempt
|
||||
|
||||
from models.listing import RentListing, ListingSite, FurnishType
|
||||
from services.image_fetcher import dump_images_for_listing, MAX_CONCURRENT_DOWNLOADS
|
||||
|
||||
|
||||
def _make_listing(**kwargs) -> RentListing: # type: ignore[no-untyped-def]
|
||||
"""Create a RentListing with sensible defaults for testing."""
|
||||
defaults = dict(
|
||||
id=12345,
|
||||
price=2000.0,
|
||||
number_of_bedrooms=2,
|
||||
square_meters=None,
|
||||
agency="Test Agency",
|
||||
council_tax_band="C",
|
||||
longitude=0.0,
|
||||
latitude=0.0,
|
||||
price_history_json="[]",
|
||||
listing_site=ListingSite.RIGHTMOVE,
|
||||
last_seen=datetime.now(),
|
||||
photo_thumbnail=None,
|
||||
floorplan_image_paths=[],
|
||||
additional_info={
|
||||
"property": {
|
||||
"visible": True,
|
||||
"floorplans": [
|
||||
{"url": "https://media.rightmove.co.uk/imgs/floorplan_1.jpg"}
|
||||
],
|
||||
}
|
||||
},
|
||||
routing_info_json=None,
|
||||
furnish_type=FurnishType.FURNISHED,
|
||||
available_from=None,
|
||||
)
|
||||
defaults.update(kwargs)
|
||||
return RentListing(**defaults)
|
||||
|
||||
|
||||
class TestDumpImagesForListing:
|
||||
"""Tests for dump_images_for_listing function."""
|
||||
|
||||
async def test_downloads_floorplan_image(self, tmp_path: Path) -> None:
|
||||
"""Test successful floorplan image download."""
|
||||
listing = _make_listing()
|
||||
image_bytes = b"\x89PNG fake image data"
|
||||
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status = 200
|
||||
mock_response.read = AsyncMock(return_value=image_bytes)
|
||||
|
||||
mock_session = MagicMock(spec=aiohttp.ClientSession)
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_response)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_session.get = MagicMock(return_value=mock_cm)
|
||||
|
||||
result = await dump_images_for_listing(
|
||||
listing, tmp_path, session=mock_session
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.id == 12345
|
||||
assert len(result.floorplan_image_paths) == 1
|
||||
# Verify the image was written
|
||||
written_path = Path(result.floorplan_image_paths[0])
|
||||
assert written_path.exists()
|
||||
assert written_path.read_bytes() == image_bytes
|
||||
|
||||
async def test_skips_existing_images(self, tmp_path: Path) -> None:
|
||||
"""Test that existing images are not re-downloaded."""
|
||||
listing = _make_listing()
|
||||
# Pre-create the floorplan file
|
||||
floorplan_dir = tmp_path / str(listing.id) / "floorplans"
|
||||
floorplan_dir.mkdir(parents=True)
|
||||
existing_file = floorplan_dir / "floorplan_1.jpg"
|
||||
existing_file.write_bytes(b"existing image")
|
||||
|
||||
mock_session = MagicMock(spec=aiohttp.ClientSession)
|
||||
|
||||
result = await dump_images_for_listing(
|
||||
listing, tmp_path, session=mock_session
|
||||
)
|
||||
|
||||
# Should return None because the only floorplan was skipped (continue)
|
||||
assert result is None
|
||||
# Session.get should NOT have been called
|
||||
mock_session.get.assert_not_called()
|
||||
|
||||
async def test_returns_none_on_404(self, tmp_path: Path) -> None:
|
||||
"""Test that 404 responses return None (image not found)."""
|
||||
listing = _make_listing()
|
||||
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status = 404
|
||||
|
||||
mock_session = MagicMock(spec=aiohttp.ClientSession)
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_response)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_session.get = MagicMock(return_value=mock_cm)
|
||||
|
||||
result = await dump_images_for_listing(
|
||||
listing, tmp_path, session=mock_session
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
async def test_raises_on_non_200_status(self, tmp_path: Path) -> None:
|
||||
"""Test that non-200/404 status raises exception."""
|
||||
listing = _make_listing()
|
||||
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status = 500
|
||||
|
||||
mock_session = MagicMock(spec=aiohttp.ClientSession)
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_response)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_session.get = MagicMock(return_value=mock_cm)
|
||||
|
||||
with pytest.raises(Exception, match="HTTP 500"):
|
||||
# Disable tenacity retry for testing: stop after 1 attempt and reraise
|
||||
await dump_images_for_listing.retry_with(
|
||||
stop=stop_after_attempt(1),
|
||||
reraise=True,
|
||||
)(listing, tmp_path, session=mock_session)
|
||||
|
||||
async def test_returns_none_when_no_floorplans(self, tmp_path: Path) -> None:
|
||||
"""Test listing with no floorplans returns None."""
|
||||
listing = _make_listing(
|
||||
additional_info={"property": {"visible": True, "floorplans": []}}
|
||||
)
|
||||
|
||||
mock_session = MagicMock(spec=aiohttp.ClientSession)
|
||||
|
||||
result = await dump_images_for_listing(
|
||||
listing, tmp_path, session=mock_session
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
async def test_url_filename_extraction(self, tmp_path: Path) -> None:
|
||||
"""Test that filenames are correctly extracted from URLs."""
|
||||
listing = _make_listing(
|
||||
additional_info={
|
||||
"property": {
|
||||
"visible": True,
|
||||
"floorplans": [
|
||||
{
|
||||
"url": "https://media.rightmove.co.uk/dir/sub/my_floorplan.png"
|
||||
}
|
||||
],
|
||||
}
|
||||
}
|
||||
)
|
||||
image_bytes = b"fake png"
|
||||
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status = 200
|
||||
mock_response.read = AsyncMock(return_value=image_bytes)
|
||||
|
||||
mock_session = MagicMock(spec=aiohttp.ClientSession)
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_response)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_session.get = MagicMock(return_value=mock_cm)
|
||||
|
||||
result = await dump_images_for_listing(
|
||||
listing, tmp_path, session=mock_session
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
written_path = Path(result.floorplan_image_paths[0])
|
||||
assert written_path.name == "my_floorplan.png"
|
||||
|
||||
async def test_creates_session_when_none_provided(self, tmp_path: Path) -> None:
|
||||
"""Test that a session is created and closed when none is provided."""
|
||||
listing = _make_listing()
|
||||
image_bytes = b"fake image"
|
||||
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status = 200
|
||||
mock_response.read = AsyncMock(return_value=image_bytes)
|
||||
|
||||
mock_session_instance = MagicMock(spec=aiohttp.ClientSession)
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_response)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_session_instance.get = MagicMock(return_value=mock_cm)
|
||||
mock_session_instance.close = AsyncMock()
|
||||
|
||||
with patch(
|
||||
"services.image_fetcher.aiohttp.ClientSession",
|
||||
return_value=mock_session_instance,
|
||||
):
|
||||
result = await dump_images_for_listing(listing, tmp_path, session=None)
|
||||
|
||||
assert result is not None
|
||||
mock_session_instance.close.assert_awaited_once()
|
||||
|
||||
|
||||
class TestImageFetcherConfig:
|
||||
"""Tests for image fetcher configuration."""
|
||||
|
||||
def test_max_concurrent_downloads_constant(self) -> None:
|
||||
"""Test that MAX_CONCURRENT_DOWNLOADS is defined and reasonable."""
|
||||
assert MAX_CONCURRENT_DOWNLOADS > 0
|
||||
assert MAX_CONCURRENT_DOWNLOADS <= 20
|
||||
Loading…
Add table
Add a link
Reference in a new issue