wrongmove/crawler/tests/unit/test_image_fetcher.py
Viktor Barzin 150342bb9e
Refactor codebase following Clean Code principles and add 229 tests
- Extract helpers to reduce function sizes (listing_tasks, app.py, query.py, listing_fetcher)
  - Replace nonlocal mutations with _PipelineState dataclass in listing_tasks
  - Fix bugs: isinstance→equality check in repository, verify_exp for OIDC tokens
  - Consolidate duplicate filter methods in listing_repository
  - Move hardcoded config to env vars with backward-compatible defaults
  - Simplify CLI decorator to auto-build QueryParameters
  - Add deprecation docstring to data_access.py
  - Test count: 158 → 387 (all passing)
2026-02-07 20:19:57 +00:00

215 lines
7.7 KiB
Python

"""Unit tests for the image fetcher service."""
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch
from datetime import datetime
import aiohttp
import pytest
from tenacity import stop_after_attempt
from models.listing import RentListing, ListingSite, FurnishType
from services.image_fetcher import dump_images_for_listing, MAX_CONCURRENT_DOWNLOADS
def _make_listing(**kwargs) -> RentListing: # type: ignore[no-untyped-def]
"""Create a RentListing with sensible defaults for testing."""
defaults = dict(
id=12345,
price=2000.0,
number_of_bedrooms=2,
square_meters=None,
agency="Test Agency",
council_tax_band="C",
longitude=0.0,
latitude=0.0,
price_history_json="[]",
listing_site=ListingSite.RIGHTMOVE,
last_seen=datetime.now(),
photo_thumbnail=None,
floorplan_image_paths=[],
additional_info={
"property": {
"visible": True,
"floorplans": [
{"url": "https://media.rightmove.co.uk/imgs/floorplan_1.jpg"}
],
}
},
routing_info_json=None,
furnish_type=FurnishType.FURNISHED,
available_from=None,
)
defaults.update(kwargs)
return RentListing(**defaults)
class TestDumpImagesForListing:
"""Tests for dump_images_for_listing function."""
async def test_downloads_floorplan_image(self, tmp_path: Path) -> None:
"""Test successful floorplan image download."""
listing = _make_listing()
image_bytes = b"\x89PNG fake image data"
mock_response = AsyncMock()
mock_response.status = 200
mock_response.read = AsyncMock(return_value=image_bytes)
mock_session = MagicMock(spec=aiohttp.ClientSession)
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_response)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_session.get = MagicMock(return_value=mock_cm)
result = await dump_images_for_listing(
listing, tmp_path, session=mock_session
)
assert result is not None
assert result.id == 12345
assert len(result.floorplan_image_paths) == 1
# Verify the image was written
written_path = Path(result.floorplan_image_paths[0])
assert written_path.exists()
assert written_path.read_bytes() == image_bytes
async def test_skips_existing_images(self, tmp_path: Path) -> None:
"""Test that existing images are not re-downloaded."""
listing = _make_listing()
# Pre-create the floorplan file
floorplan_dir = tmp_path / str(listing.id) / "floorplans"
floorplan_dir.mkdir(parents=True)
existing_file = floorplan_dir / "floorplan_1.jpg"
existing_file.write_bytes(b"existing image")
mock_session = MagicMock(spec=aiohttp.ClientSession)
result = await dump_images_for_listing(
listing, tmp_path, session=mock_session
)
# Should return None because the only floorplan was skipped (continue)
assert result is None
# Session.get should NOT have been called
mock_session.get.assert_not_called()
async def test_returns_none_on_404(self, tmp_path: Path) -> None:
"""Test that 404 responses return None (image not found)."""
listing = _make_listing()
mock_response = AsyncMock()
mock_response.status = 404
mock_session = MagicMock(spec=aiohttp.ClientSession)
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_response)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_session.get = MagicMock(return_value=mock_cm)
result = await dump_images_for_listing(
listing, tmp_path, session=mock_session
)
assert result is None
async def test_raises_on_non_200_status(self, tmp_path: Path) -> None:
"""Test that non-200/404 status raises exception."""
listing = _make_listing()
mock_response = AsyncMock()
mock_response.status = 500
mock_session = MagicMock(spec=aiohttp.ClientSession)
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_response)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_session.get = MagicMock(return_value=mock_cm)
with pytest.raises(Exception, match="HTTP 500"):
# Disable tenacity retry for testing: stop after 1 attempt and reraise
await dump_images_for_listing.retry_with(
stop=stop_after_attempt(1),
reraise=True,
)(listing, tmp_path, session=mock_session)
async def test_returns_none_when_no_floorplans(self, tmp_path: Path) -> None:
"""Test listing with no floorplans returns None."""
listing = _make_listing(
additional_info={"property": {"visible": True, "floorplans": []}}
)
mock_session = MagicMock(spec=aiohttp.ClientSession)
result = await dump_images_for_listing(
listing, tmp_path, session=mock_session
)
assert result is None
async def test_url_filename_extraction(self, tmp_path: Path) -> None:
"""Test that filenames are correctly extracted from URLs."""
listing = _make_listing(
additional_info={
"property": {
"visible": True,
"floorplans": [
{
"url": "https://media.rightmove.co.uk/dir/sub/my_floorplan.png"
}
],
}
}
)
image_bytes = b"fake png"
mock_response = AsyncMock()
mock_response.status = 200
mock_response.read = AsyncMock(return_value=image_bytes)
mock_session = MagicMock(spec=aiohttp.ClientSession)
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_response)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_session.get = MagicMock(return_value=mock_cm)
result = await dump_images_for_listing(
listing, tmp_path, session=mock_session
)
assert result is not None
written_path = Path(result.floorplan_image_paths[0])
assert written_path.name == "my_floorplan.png"
async def test_creates_session_when_none_provided(self, tmp_path: Path) -> None:
"""Test that a session is created and closed when none is provided."""
listing = _make_listing()
image_bytes = b"fake image"
mock_response = AsyncMock()
mock_response.status = 200
mock_response.read = AsyncMock(return_value=image_bytes)
mock_session_instance = MagicMock(spec=aiohttp.ClientSession)
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_response)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_session_instance.get = MagicMock(return_value=mock_cm)
mock_session_instance.close = AsyncMock()
with patch(
"services.image_fetcher.aiohttp.ClientSession",
return_value=mock_session_instance,
):
result = await dump_images_for_listing(listing, tmp_path, session=None)
assert result is not None
mock_session_instance.close.assert_awaited_once()
class TestImageFetcherConfig:
"""Tests for image fetcher configuration."""
def test_max_concurrent_downloads_constant(self) -> None:
"""Test that MAX_CONCURRENT_DOWNLOADS is defined and reasonable."""
assert MAX_CONCURRENT_DOWNLOADS > 0
assert MAX_CONCURRENT_DOWNLOADS <= 20