Add services layer, tests, streaming UI, and cleanup legacy code
This commit is contained in:
parent
5514fa6381
commit
d205d15c74
62 changed files with 3729 additions and 1024 deletions
293
crawler/tests/unit/test_schedule_config.py
Normal file
293
crawler/tests/unit/test_schedule_config.py
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
"""Unit tests for schedule configuration."""
|
||||
import os
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from config.schedule_config import ScheduleConfig, SchedulesConfig
|
||||
from models.listing import FurnishType, ListingType
|
||||
|
||||
|
||||
class TestScheduleConfig:
|
||||
"""Tests for ScheduleConfig model."""
|
||||
|
||||
def test_basic_creation_with_defaults(self):
|
||||
"""Test creating a schedule with minimal required fields."""
|
||||
schedule = ScheduleConfig(name="Test Schedule", listing_type=ListingType.RENT)
|
||||
|
||||
assert schedule.name == "Test Schedule"
|
||||
assert schedule.enabled is True
|
||||
assert schedule.minute == "0"
|
||||
assert schedule.hour == "2"
|
||||
assert schedule.day_of_week == "*"
|
||||
assert schedule.listing_type == ListingType.RENT
|
||||
assert schedule.min_bedrooms == 1
|
||||
assert schedule.max_bedrooms == 999
|
||||
assert schedule.min_price == 0
|
||||
assert schedule.max_price == 10_000_000
|
||||
assert schedule.district_names == []
|
||||
assert schedule.furnish_types is None
|
||||
|
||||
def test_full_creation(self):
|
||||
"""Test creating a schedule with all fields specified."""
|
||||
schedule = ScheduleConfig(
|
||||
name="Full Schedule",
|
||||
enabled=False,
|
||||
minute="30",
|
||||
hour="4",
|
||||
day_of_week="1,3,5",
|
||||
listing_type=ListingType.BUY,
|
||||
min_bedrooms=2,
|
||||
max_bedrooms=3,
|
||||
min_price=400000,
|
||||
max_price=800000,
|
||||
district_names=["Westminster", "Camden"],
|
||||
furnish_types=["furnished", "unfurnished"],
|
||||
)
|
||||
|
||||
assert schedule.name == "Full Schedule"
|
||||
assert schedule.enabled is False
|
||||
assert schedule.minute == "30"
|
||||
assert schedule.hour == "4"
|
||||
assert schedule.day_of_week == "1,3,5"
|
||||
assert schedule.listing_type == ListingType.BUY
|
||||
assert schedule.min_bedrooms == 2
|
||||
assert schedule.max_bedrooms == 3
|
||||
assert schedule.min_price == 400000
|
||||
assert schedule.max_price == 800000
|
||||
assert schedule.district_names == ["Westminster", "Camden"]
|
||||
assert schedule.furnish_types == ["furnished", "unfurnished"]
|
||||
|
||||
def test_to_query_parameters(self):
|
||||
"""Test conversion to QueryParameters."""
|
||||
schedule = ScheduleConfig(
|
||||
name="Test",
|
||||
listing_type=ListingType.RENT,
|
||||
min_bedrooms=2,
|
||||
max_bedrooms=3,
|
||||
min_price=2000,
|
||||
max_price=4000,
|
||||
district_names=["Westminster"],
|
||||
furnish_types=["furnished"],
|
||||
)
|
||||
|
||||
params = schedule.to_query_parameters()
|
||||
|
||||
assert params.listing_type == ListingType.RENT
|
||||
assert params.min_bedrooms == 2
|
||||
assert params.max_bedrooms == 3
|
||||
assert params.min_price == 2000
|
||||
assert params.max_price == 4000
|
||||
assert params.district_names == {"Westminster"}
|
||||
assert params.furnish_types == [FurnishType.FURNISHED]
|
||||
|
||||
def test_to_query_parameters_no_furnish_types(self):
|
||||
"""Test conversion when furnish_types is None."""
|
||||
schedule = ScheduleConfig(
|
||||
name="Test",
|
||||
listing_type=ListingType.BUY,
|
||||
)
|
||||
|
||||
params = schedule.to_query_parameters()
|
||||
|
||||
assert params.furnish_types is None
|
||||
|
||||
|
||||
class TestCronValidation:
|
||||
"""Tests for cron field validation."""
|
||||
|
||||
# Valid minute values
|
||||
@pytest.mark.parametrize(
|
||||
"minute",
|
||||
[
|
||||
"0",
|
||||
"59",
|
||||
"*",
|
||||
"*/5",
|
||||
"*/15",
|
||||
"0,15,30,45",
|
||||
],
|
||||
)
|
||||
def test_valid_minute(self, minute: str):
|
||||
"""Test valid minute values are accepted."""
|
||||
schedule = ScheduleConfig(
|
||||
name="Test", listing_type=ListingType.RENT, minute=minute
|
||||
)
|
||||
assert schedule.minute == minute
|
||||
|
||||
# Invalid minute values
|
||||
@pytest.mark.parametrize(
|
||||
"minute",
|
||||
[
|
||||
"60",
|
||||
"-1",
|
||||
"abc",
|
||||
"*/0",
|
||||
],
|
||||
)
|
||||
def test_invalid_minute(self, minute: str):
|
||||
"""Test invalid minute values are rejected."""
|
||||
with pytest.raises(ValidationError):
|
||||
ScheduleConfig(name="Test", listing_type=ListingType.RENT, minute=minute)
|
||||
|
||||
# Valid hour values
|
||||
@pytest.mark.parametrize(
|
||||
"hour",
|
||||
[
|
||||
"0",
|
||||
"23",
|
||||
"*",
|
||||
"*/6",
|
||||
"0,6,12,18",
|
||||
],
|
||||
)
|
||||
def test_valid_hour(self, hour: str):
|
||||
"""Test valid hour values are accepted."""
|
||||
schedule = ScheduleConfig(
|
||||
name="Test", listing_type=ListingType.RENT, hour=hour
|
||||
)
|
||||
assert schedule.hour == hour
|
||||
|
||||
# Invalid hour values
|
||||
@pytest.mark.parametrize(
|
||||
"hour",
|
||||
[
|
||||
"24",
|
||||
"-1",
|
||||
"abc",
|
||||
"*/0",
|
||||
],
|
||||
)
|
||||
def test_invalid_hour(self, hour: str):
|
||||
"""Test invalid hour values are rejected."""
|
||||
with pytest.raises(ValidationError):
|
||||
ScheduleConfig(name="Test", listing_type=ListingType.RENT, hour=hour)
|
||||
|
||||
# Valid day_of_week values
|
||||
@pytest.mark.parametrize(
|
||||
"day_of_week",
|
||||
[
|
||||
"0",
|
||||
"6",
|
||||
"*",
|
||||
"1,3,5",
|
||||
"*/2",
|
||||
],
|
||||
)
|
||||
def test_valid_day_of_week(self, day_of_week: str):
|
||||
"""Test valid day_of_week values are accepted."""
|
||||
schedule = ScheduleConfig(
|
||||
name="Test", listing_type=ListingType.RENT, day_of_week=day_of_week
|
||||
)
|
||||
assert schedule.day_of_week == day_of_week
|
||||
|
||||
# Invalid day_of_week values
|
||||
@pytest.mark.parametrize(
|
||||
"day_of_week",
|
||||
[
|
||||
"7",
|
||||
"-1",
|
||||
"abc",
|
||||
"*/0",
|
||||
],
|
||||
)
|
||||
def test_invalid_day_of_week(self, day_of_week: str):
|
||||
"""Test invalid day_of_week values are rejected."""
|
||||
with pytest.raises(ValidationError):
|
||||
ScheduleConfig(
|
||||
name="Test", listing_type=ListingType.RENT, day_of_week=day_of_week
|
||||
)
|
||||
|
||||
|
||||
class TestSchedulesConfig:
|
||||
"""Tests for SchedulesConfig container."""
|
||||
|
||||
def test_from_env_empty(self):
|
||||
"""Test loading from empty environment variable."""
|
||||
with mock.patch.dict(os.environ, {"SCRAPE_SCHEDULES": ""}, clear=False):
|
||||
config = SchedulesConfig.from_env()
|
||||
assert config.schedules == []
|
||||
|
||||
def test_from_env_missing(self):
|
||||
"""Test loading when environment variable is not set."""
|
||||
with mock.patch.dict(os.environ, {}, clear=True):
|
||||
# Ensure SCRAPE_SCHEDULES is not set
|
||||
os.environ.pop("SCRAPE_SCHEDULES", None)
|
||||
config = SchedulesConfig.from_env()
|
||||
assert config.schedules == []
|
||||
|
||||
def test_from_env_valid_single(self):
|
||||
"""Test loading a single valid schedule."""
|
||||
json_config = '[{"name":"Daily RENT","listing_type":"RENT","hour":"2"}]'
|
||||
with mock.patch.dict(os.environ, {"SCRAPE_SCHEDULES": json_config}):
|
||||
config = SchedulesConfig.from_env()
|
||||
|
||||
assert len(config.schedules) == 1
|
||||
assert config.schedules[0].name == "Daily RENT"
|
||||
assert config.schedules[0].listing_type == ListingType.RENT
|
||||
assert config.schedules[0].hour == "2"
|
||||
|
||||
def test_from_env_valid_multiple(self):
|
||||
"""Test loading multiple valid schedules."""
|
||||
json_config = """[
|
||||
{"name":"Daily RENT","listing_type":"RENT","hour":"2"},
|
||||
{"name":"Daily BUY","listing_type":"BUY","hour":"4","enabled":false}
|
||||
]"""
|
||||
with mock.patch.dict(os.environ, {"SCRAPE_SCHEDULES": json_config}):
|
||||
config = SchedulesConfig.from_env()
|
||||
|
||||
assert len(config.schedules) == 2
|
||||
assert config.schedules[0].name == "Daily RENT"
|
||||
assert config.schedules[0].enabled is True
|
||||
assert config.schedules[1].name == "Daily BUY"
|
||||
assert config.schedules[1].enabled is False
|
||||
|
||||
def test_from_env_invalid_json(self):
|
||||
"""Test error on invalid JSON."""
|
||||
with mock.patch.dict(os.environ, {"SCRAPE_SCHEDULES": "not json"}):
|
||||
with pytest.raises(ValueError, match="Invalid JSON"):
|
||||
SchedulesConfig.from_env()
|
||||
|
||||
def test_from_env_not_array(self):
|
||||
"""Test error when JSON is not an array."""
|
||||
with mock.patch.dict(os.environ, {"SCRAPE_SCHEDULES": '{"name":"test"}'}):
|
||||
with pytest.raises(ValueError, match="must be a JSON array"):
|
||||
SchedulesConfig.from_env()
|
||||
|
||||
def test_from_env_invalid_schedule(self):
|
||||
"""Test error when schedule validation fails."""
|
||||
# Missing required listing_type
|
||||
json_config = '[{"name":"Invalid"}]'
|
||||
with mock.patch.dict(os.environ, {"SCRAPE_SCHEDULES": json_config}):
|
||||
with pytest.raises(ValidationError):
|
||||
SchedulesConfig.from_env()
|
||||
|
||||
def test_get_enabled_schedules(self):
|
||||
"""Test filtering to only enabled schedules."""
|
||||
config = SchedulesConfig(
|
||||
schedules=[
|
||||
ScheduleConfig(name="Enabled", listing_type=ListingType.RENT, enabled=True),
|
||||
ScheduleConfig(name="Disabled", listing_type=ListingType.BUY, enabled=False),
|
||||
ScheduleConfig(name="Also Enabled", listing_type=ListingType.RENT, enabled=True),
|
||||
]
|
||||
)
|
||||
|
||||
enabled = config.get_enabled_schedules()
|
||||
|
||||
assert len(enabled) == 2
|
||||
assert enabled[0].name == "Enabled"
|
||||
assert enabled[1].name == "Also Enabled"
|
||||
|
||||
def test_get_enabled_schedules_all_disabled(self):
|
||||
"""Test when all schedules are disabled."""
|
||||
config = SchedulesConfig(
|
||||
schedules=[
|
||||
ScheduleConfig(name="Disabled1", listing_type=ListingType.RENT, enabled=False),
|
||||
ScheduleConfig(name="Disabled2", listing_type=ListingType.BUY, enabled=False),
|
||||
]
|
||||
)
|
||||
|
||||
enabled = config.get_enabled_schedules()
|
||||
|
||||
assert len(enabled) == 0
|
||||
Loading…
Add table
Add a link
Reference in a new issue