Flatten repo structure: move crawler/ to root, remove vqa/ and immoweb/
The crawler subdirectory was the only active project. Moving it to the repo root simplifies paths and removes the unnecessary nesting. The vqa/ and immoweb/ directories were legacy/unused and have been removed. Updated .drone.yml, .gitignore, .claude/ docs, and skills to reflect the new flat structure.
This commit is contained in:
parent
e2247be700
commit
eafbc1ac52
221 changed files with 70 additions and 146140 deletions
385
tests/unit/test_query.py
Normal file
385
tests/unit/test_query.py
Normal file
|
|
@ -0,0 +1,385 @@
|
|||
"""Unit tests for rec/query.py."""
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
import aiohttp
|
||||
|
||||
from rec.query import (
|
||||
detail_query,
|
||||
listing_query,
|
||||
probe_query,
|
||||
PropertyType,
|
||||
create_session,
|
||||
_build_base_params,
|
||||
_build_listing_params,
|
||||
_build_probe_params,
|
||||
ANDROID_APP_VERSION,
|
||||
ANDROID_APP_VERSION_LISTING,
|
||||
RIGHTMOVE_API_BASE,
|
||||
PROPERTY_LISTING_ENDPOINT,
|
||||
DEFAULT_HEADERS,
|
||||
LISTING_HEADERS,
|
||||
check_circuit_breaker,
|
||||
reset_circuit_breaker,
|
||||
get_circuit_breaker,
|
||||
)
|
||||
from models.listing import ListingType, FurnishType
|
||||
from config.scraper_config import ScraperConfig
|
||||
from rec.exceptions import CircuitBreakerOpenError
|
||||
from rec.throttle_detector import reset_throttle_metrics
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def config() -> ScraperConfig:
|
||||
return ScraperConfig(
|
||||
max_concurrent_requests=5,
|
||||
request_delay_ms=10,
|
||||
slow_response_threshold=10.0,
|
||||
enable_circuit_breaker=True,
|
||||
circuit_breaker_failure_threshold=3,
|
||||
circuit_breaker_recovery_timeout=0.5,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def config_no_cb() -> ScraperConfig:
|
||||
return ScraperConfig(enable_circuit_breaker=False)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_globals() -> None:
|
||||
reset_throttle_metrics()
|
||||
reset_circuit_breaker()
|
||||
|
||||
|
||||
class MockResponse:
|
||||
def __init__(
|
||||
self,
|
||||
status: int = 200,
|
||||
json_data: dict | None = None,
|
||||
text: str = "",
|
||||
):
|
||||
self.status = status
|
||||
self._json_data = json_data or {}
|
||||
self._text = text
|
||||
|
||||
async def json(self) -> dict:
|
||||
return self._json_data
|
||||
|
||||
async def text(self) -> str:
|
||||
return self._text
|
||||
|
||||
async def __aenter__(self) -> "MockResponse":
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *args: object) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def make_mock_session(response: MockResponse) -> MagicMock:
|
||||
"""Create a mock session whose .get() returns an async context manager."""
|
||||
mock_session = MagicMock()
|
||||
mock_session.get = MagicMock(return_value=response)
|
||||
return mock_session
|
||||
|
||||
|
||||
def make_mock_session_fn(get_fn: object) -> MagicMock:
|
||||
"""Create a mock session whose .get() calls a function to produce responses."""
|
||||
mock_session = MagicMock()
|
||||
mock_session.get = MagicMock(side_effect=get_fn)
|
||||
return mock_session
|
||||
|
||||
|
||||
class TestBuildBaseParams:
|
||||
def test_constructs_correct_params(self) -> None:
|
||||
with patch("rec.query.districts.get_districts", return_value={"TestDistrict": "REGION^123"}):
|
||||
params = _build_base_params(
|
||||
channel=ListingType.RENT,
|
||||
page=2,
|
||||
page_size=25,
|
||||
radius=1.5,
|
||||
min_price=1000,
|
||||
max_price=3000,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=3,
|
||||
district="TestDistrict",
|
||||
)
|
||||
|
||||
assert params["locationIdentifier"] == "REGION^123"
|
||||
assert params["channel"] == "RENT"
|
||||
assert params["page"] == "2"
|
||||
assert params["numberOfPropertiesPerPage"] == "25"
|
||||
assert params["radius"] == "1.5"
|
||||
assert params["sortBy"] == "distance"
|
||||
assert params["includeUnavailableProperties"] == "false"
|
||||
assert params["minPrice"] == "1000"
|
||||
assert params["maxPrice"] == "3000"
|
||||
assert params["minBedrooms"] == "1"
|
||||
assert params["maxBedrooms"] == "3"
|
||||
assert params["apiApplication"] == "ANDROID"
|
||||
assert params["appVersion"] == ANDROID_APP_VERSION_LISTING
|
||||
|
||||
def test_buy_channel_includes_dont_show_and_max_days(self) -> None:
|
||||
with patch("rec.query.districts.get_districts", return_value={"D": "LOC1"}):
|
||||
params = _build_listing_params(
|
||||
page=1,
|
||||
channel=ListingType.BUY,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=2,
|
||||
radius=1.0,
|
||||
min_price=100000,
|
||||
max_price=500000,
|
||||
district="D",
|
||||
mustNewHome=False,
|
||||
max_days_since_added=7,
|
||||
property_type=[],
|
||||
page_size=25,
|
||||
furnish_types=[],
|
||||
)
|
||||
|
||||
assert params["dontShow"] == "sharedOwnership,retirement"
|
||||
assert params["maxDaysSinceAdded"] == "7"
|
||||
|
||||
def test_rent_channel_includes_furnish_types(self) -> None:
|
||||
with patch("rec.query.districts.get_districts", return_value={"D": "LOC1"}):
|
||||
params = _build_listing_params(
|
||||
page=1,
|
||||
channel=ListingType.RENT,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=2,
|
||||
radius=1.0,
|
||||
min_price=1000,
|
||||
max_price=3000,
|
||||
district="D",
|
||||
mustNewHome=False,
|
||||
max_days_since_added=30,
|
||||
property_type=[],
|
||||
page_size=25,
|
||||
furnish_types=[FurnishType.FURNISHED, FurnishType.UNFURNISHED],
|
||||
)
|
||||
|
||||
assert params["furnishTypes"] == "furnished,unfurnished"
|
||||
assert "dontShow" not in params
|
||||
assert "maxDaysSinceAdded" not in params
|
||||
|
||||
def test_buy_channel_probe_includes_dont_show(self) -> None:
|
||||
with patch("rec.query.districts.get_districts", return_value={"D": "LOC1"}):
|
||||
params = _build_probe_params(
|
||||
channel=ListingType.BUY,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=2,
|
||||
radius=1.0,
|
||||
min_price=100000,
|
||||
max_price=500000,
|
||||
district="D",
|
||||
max_days_since_added=7,
|
||||
furnish_types=[],
|
||||
)
|
||||
|
||||
assert params["dontShow"] == "sharedOwnership,retirement"
|
||||
assert params["maxDaysSinceAdded"] == "7"
|
||||
assert params["numberOfPropertiesPerPage"] == "1"
|
||||
|
||||
def test_probe_buy_skips_max_days_if_not_valid(self) -> None:
|
||||
with patch("rec.query.districts.get_districts", return_value={"D": "LOC1"}):
|
||||
params = _build_probe_params(
|
||||
channel=ListingType.BUY,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=2,
|
||||
radius=1.0,
|
||||
min_price=100000,
|
||||
max_price=500000,
|
||||
district="D",
|
||||
max_days_since_added=30,
|
||||
furnish_types=[],
|
||||
)
|
||||
|
||||
# 30 is not in [1, 3, 7, 14], so maxDaysSinceAdded is not added for probe
|
||||
assert "maxDaysSinceAdded" not in params
|
||||
|
||||
|
||||
class TestMutableDefaultArgFix:
|
||||
@pytest.mark.asyncio
|
||||
async def test_property_type_default_not_shared(self, config: ScraperConfig) -> None:
|
||||
"""Calling listing_query with no property_type should not share state between calls."""
|
||||
response = MockResponse(
|
||||
status=200,
|
||||
json_data={"totalAvailableResults": 0, "properties": []},
|
||||
)
|
||||
mock_session = make_mock_session(response)
|
||||
|
||||
with patch("rec.query.districts.get_districts", return_value={"D": "LOC1"}):
|
||||
# Call twice without explicit property_type
|
||||
await listing_query(
|
||||
page=1,
|
||||
channel=ListingType.RENT,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=2,
|
||||
radius=1.0,
|
||||
min_price=1000,
|
||||
max_price=2000,
|
||||
district="D",
|
||||
session=mock_session,
|
||||
config=config,
|
||||
)
|
||||
await listing_query(
|
||||
page=1,
|
||||
channel=ListingType.RENT,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=2,
|
||||
radius=1.0,
|
||||
min_price=1000,
|
||||
max_price=2000,
|
||||
district="D",
|
||||
session=mock_session,
|
||||
config=config,
|
||||
)
|
||||
# If mutable default was shared, this test would detect mutations.
|
||||
# The fact that it completes without error proves defaults are independent.
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_furnish_types_default_not_shared(self, config: ScraperConfig) -> None:
|
||||
"""Calling probe_query with no furnish_types should not share state between calls."""
|
||||
response = MockResponse(
|
||||
status=200,
|
||||
json_data={"totalAvailableResults": 0, "properties": []},
|
||||
)
|
||||
mock_session = make_mock_session(response)
|
||||
|
||||
with patch("rec.query.districts.get_districts", return_value={"D": "LOC1"}):
|
||||
await probe_query(
|
||||
session=mock_session,
|
||||
channel=ListingType.RENT,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=2,
|
||||
radius=1.0,
|
||||
min_price=1000,
|
||||
max_price=2000,
|
||||
district="D",
|
||||
config=config,
|
||||
)
|
||||
await probe_query(
|
||||
session=mock_session,
|
||||
channel=ListingType.RENT,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=2,
|
||||
radius=1.0,
|
||||
min_price=1000,
|
||||
max_price=2000,
|
||||
district="D",
|
||||
config=config,
|
||||
)
|
||||
|
||||
|
||||
class TestPropertyTypeEnum:
|
||||
def test_enum_values(self) -> None:
|
||||
assert PropertyType.BUNGALOW == "bungalow"
|
||||
assert PropertyType.DETACHED == "detached"
|
||||
assert PropertyType.FLAT == "flat"
|
||||
assert PropertyType.LAND == "land"
|
||||
assert PropertyType.PARK_HOME == "park-home"
|
||||
assert PropertyType.SEMI_DETACHED == "semi-detached"
|
||||
assert PropertyType.TERRACED == "terraced"
|
||||
|
||||
def test_enum_is_str(self) -> None:
|
||||
assert isinstance(PropertyType.FLAT, str)
|
||||
assert ",".join([PropertyType.FLAT, PropertyType.DETACHED]) == "flat,detached"
|
||||
|
||||
|
||||
class TestDetailQuery:
|
||||
@pytest.mark.asyncio
|
||||
async def test_success_200(self, config: ScraperConfig) -> None:
|
||||
expected_body = {"id": 12345, "address": "123 Test St"}
|
||||
response = MockResponse(status=200, json_data=expected_body)
|
||||
mock_session = make_mock_session(response)
|
||||
|
||||
result = await detail_query(12345, session=mock_session, config=config)
|
||||
assert result == expected_body
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_raises_on_non_200(self, config: ScraperConfig) -> None:
|
||||
response = MockResponse(status=404, text="Not Found")
|
||||
mock_session = make_mock_session(response)
|
||||
|
||||
with pytest.raises(Exception, match="Failed due to"):
|
||||
await detail_query(99999, session=mock_session, config=config)
|
||||
|
||||
|
||||
class TestCircuitBreakerBlocksRequests:
|
||||
@pytest.mark.asyncio
|
||||
async def test_circuit_breaker_blocks_when_open(self, config: ScraperConfig) -> None:
|
||||
cb = get_circuit_breaker(config)
|
||||
assert cb is not None
|
||||
for _ in range(config.circuit_breaker_failure_threshold):
|
||||
cb.record_failure()
|
||||
assert cb.is_open
|
||||
|
||||
mock_session = MagicMock()
|
||||
|
||||
with pytest.raises(CircuitBreakerOpenError):
|
||||
await detail_query(1, session=mock_session, config=config)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_circuit_breaker_blocks_listing_query(self, config: ScraperConfig) -> None:
|
||||
cb = get_circuit_breaker(config)
|
||||
assert cb is not None
|
||||
for _ in range(config.circuit_breaker_failure_threshold):
|
||||
cb.record_failure()
|
||||
|
||||
mock_session = MagicMock()
|
||||
|
||||
with patch("rec.query.districts.get_districts", return_value={"D": "LOC1"}):
|
||||
with pytest.raises(CircuitBreakerOpenError):
|
||||
await listing_query(
|
||||
page=1,
|
||||
channel=ListingType.RENT,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=2,
|
||||
radius=1.0,
|
||||
min_price=1000,
|
||||
max_price=2000,
|
||||
district="D",
|
||||
session=mock_session,
|
||||
config=config,
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_circuit_breaker_blocks_probe_query(self, config: ScraperConfig) -> None:
|
||||
cb = get_circuit_breaker(config)
|
||||
assert cb is not None
|
||||
for _ in range(config.circuit_breaker_failure_threshold):
|
||||
cb.record_failure()
|
||||
|
||||
mock_session = MagicMock()
|
||||
|
||||
with patch("rec.query.districts.get_districts", return_value={"D": "LOC1"}):
|
||||
with pytest.raises(CircuitBreakerOpenError):
|
||||
await probe_query(
|
||||
session=mock_session,
|
||||
channel=ListingType.RENT,
|
||||
min_bedrooms=1,
|
||||
max_bedrooms=2,
|
||||
radius=1.0,
|
||||
min_price=1000,
|
||||
max_price=2000,
|
||||
district="D",
|
||||
config=config,
|
||||
)
|
||||
|
||||
|
||||
class TestConstants:
|
||||
def test_android_app_version(self) -> None:
|
||||
assert ANDROID_APP_VERSION == "3.70.0"
|
||||
|
||||
def test_android_app_version_listing(self) -> None:
|
||||
assert ANDROID_APP_VERSION_LISTING == "4.28.0"
|
||||
|
||||
def test_rightmove_api_base(self) -> None:
|
||||
assert RIGHTMOVE_API_BASE == "https://api.rightmove.co.uk/api"
|
||||
|
||||
def test_property_listing_endpoint(self) -> None:
|
||||
assert PROPERTY_LISTING_ENDPOINT == "https://api.rightmove.co.uk/api/property-listing"
|
||||
|
||||
def test_listing_headers_extends_default(self) -> None:
|
||||
for key, value in DEFAULT_HEADERS.items():
|
||||
assert LISTING_HEADERS[key] == value
|
||||
assert LISTING_HEADERS["Accept-Encoding"] == "gzip, deflate, br"
|
||||
Loading…
Add table
Add a link
Reference in a new issue