diff --git a/crawler/.env.sample b/crawler/.env.sample index 1e7fe00..567d519 100644 --- a/crawler/.env.sample +++ b/crawler/.env.sample @@ -6,12 +6,3 @@ export ROUTING_API_KEY="" # fetch from https://console.cloud.google.c export DB_CONNECTION_STRING="sqlite:///data/wrongmove.db" # by default use SQLite locally export CELERY_BROKER_URL="redis://localhost:6379/0" # processing background tasks export CELERY_RESULT_BACKEND="redis://localhost:6379/1" - -# Periodic scraping schedules (JSON array) -# Each schedule has: name, enabled, hour, minute, day_of_week, listing_type, min/max_bedrooms, min/max_price, district_names, furnish_types -# Cron fields: minute (0-59), hour (0-23), day_of_week (0-6, 0=Sunday) -# Example: -# SCRAPE_SCHEDULES='[{"name":"Daily RENT","listing_type":"RENT","hour":"2","min_bedrooms":2,"max_bedrooms":3,"min_price":2000,"max_price":4000}]' -# Multiple schedules: -# SCRAPE_SCHEDULES='[{"name":"RENT 2am","listing_type":"RENT","hour":"2"},{"name":"BUY 4am","listing_type":"BUY","hour":"4"}]' -SCRAPE_SCHEDULES= diff --git a/crawler/alembic/versions/e5f1bc4e3323_fix_typo_in_logitude_column.py b/crawler/alembic/versions/e5f1bc4e3323_fix_typo_in_logitude_column.py deleted file mode 100644 index 190b5f8..0000000 --- a/crawler/alembic/versions/e5f1bc4e3323_fix_typo_in_logitude_column.py +++ /dev/null @@ -1,106 +0,0 @@ -"""fix typo in logitude column - -Revision ID: e5f1bc4e3323 -Revises: 8220f657bae5 -Create Date: 2025-10-18 20:31:29.558034 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import mysql - -# revision identifiers, used by Alembic. -revision: str = 'e5f1bc4e3323' -down_revision: Union[str, None] = '8220f657bae5' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - """Upgrade schema.""" - # ### commands auto generated by Alembic - please adjust! ### - op.drop_index(op.f('ix_user_email'), table_name='user') - op.drop_table('user') - op.drop_index(op.f('ix_rentlisting_last_seen'), table_name='rentlisting') - op.drop_index(op.f('ix_rentlisting_number_of_bedrooms'), table_name='rentlisting') - op.drop_index(op.f('ix_rentlisting_price'), table_name='rentlisting') - op.drop_index(op.f('ix_rentlisting_square_meters'), table_name='rentlisting') - op.drop_table('rentlisting') - op.drop_index(op.f('ix_buylisting_last_seen'), table_name='buylisting') - op.drop_index(op.f('ix_buylisting_number_of_bedrooms'), table_name='buylisting') - op.drop_index(op.f('ix_buylisting_price'), table_name='buylisting') - op.drop_index(op.f('ix_buylisting_square_meters'), table_name='buylisting') - op.drop_table('buylisting') - # ### end Alembic commands ### - - -def downgrade() -> None: - """Downgrade schema.""" - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('buylisting', - sa.Column('id', mysql.INTEGER(), autoincrement=True, nullable=False), - sa.Column('price', mysql.FLOAT(), nullable=False), - sa.Column('number_of_bedrooms', mysql.INTEGER(), autoincrement=False, nullable=False), - sa.Column('square_meters', mysql.FLOAT(), nullable=True), - sa.Column('agency', mysql.VARCHAR(length=255), nullable=True), - sa.Column('council_tax_band', mysql.VARCHAR(length=255), nullable=True), - sa.Column('longtitude', mysql.FLOAT(), nullable=False), - sa.Column('latitude', mysql.FLOAT(), nullable=False), - sa.Column('price_history_json', mysql.TEXT(), nullable=False), - sa.Column('listing_site', mysql.ENUM('RIGHTMOVE'), nullable=False), - sa.Column('last_seen', mysql.DATETIME(), nullable=False), - sa.Column('photo_thumbnail', mysql.VARCHAR(length=255), nullable=True), - sa.Column('floorplan_image_paths', mysql.JSON(), nullable=False), - sa.Column('additional_info', mysql.JSON(), nullable=False), - sa.Column('routing_info_json', mysql.TEXT(), nullable=True), - sa.Column('service_charge', mysql.FLOAT(), nullable=True), - sa.Column('lease_left', mysql.INTEGER(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('id'), - mysql_collate='utf8mb4_0900_ai_ci', - mysql_default_charset='utf8mb4', - mysql_engine='InnoDB' - ) - op.create_index(op.f('ix_buylisting_square_meters'), 'buylisting', ['square_meters'], unique=False) - op.create_index(op.f('ix_buylisting_price'), 'buylisting', ['price'], unique=False) - op.create_index(op.f('ix_buylisting_number_of_bedrooms'), 'buylisting', ['number_of_bedrooms'], unique=False) - op.create_index(op.f('ix_buylisting_last_seen'), 'buylisting', ['last_seen'], unique=False) - op.create_table('rentlisting', - sa.Column('id', mysql.INTEGER(), autoincrement=True, nullable=False), - sa.Column('price', mysql.FLOAT(), nullable=False), - sa.Column('number_of_bedrooms', mysql.INTEGER(), autoincrement=False, nullable=False), - sa.Column('square_meters', mysql.FLOAT(), nullable=True), - sa.Column('agency', mysql.VARCHAR(length=255), nullable=True), - sa.Column('council_tax_band', mysql.VARCHAR(length=255), nullable=True), - sa.Column('longtitude', mysql.FLOAT(), nullable=False), - sa.Column('latitude', mysql.FLOAT(), nullable=False), - sa.Column('price_history_json', mysql.TEXT(), nullable=False), - sa.Column('listing_site', mysql.ENUM('RIGHTMOVE'), nullable=False), - sa.Column('last_seen', mysql.DATETIME(), nullable=False), - sa.Column('photo_thumbnail', mysql.VARCHAR(length=255), nullable=True), - sa.Column('floorplan_image_paths', mysql.JSON(), nullable=False), - sa.Column('additional_info', mysql.JSON(), nullable=False), - sa.Column('routing_info_json', mysql.TEXT(), nullable=True), - sa.Column('available_from', mysql.DATETIME(), nullable=True), - sa.Column('furnish_type', mysql.ENUM('FURNISHED', 'UNFURNISHED', 'PART_FURNISHED', 'ASK_LANDLORD', 'UNKNOWN'), nullable=False), - sa.PrimaryKeyConstraint('id'), - mysql_collate='utf8mb4_0900_ai_ci', - mysql_default_charset='utf8mb4', - mysql_engine='InnoDB' - ) - op.create_index(op.f('ix_rentlisting_square_meters'), 'rentlisting', ['square_meters'], unique=False) - op.create_index(op.f('ix_rentlisting_price'), 'rentlisting', ['price'], unique=False) - op.create_index(op.f('ix_rentlisting_number_of_bedrooms'), 'rentlisting', ['number_of_bedrooms'], unique=False) - op.create_index(op.f('ix_rentlisting_last_seen'), 'rentlisting', ['last_seen'], unique=False) - op.create_table('user', - sa.Column('id', mysql.INTEGER(), autoincrement=True, nullable=False), - sa.Column('email', mysql.VARCHAR(length=255), nullable=False), - sa.Column('password', mysql.VARCHAR(length=255), nullable=False), - sa.PrimaryKeyConstraint('id'), - mysql_collate='utf8mb4_0900_ai_ci', - mysql_default_charset='utf8mb4', - mysql_engine='InnoDB' - ) - op.create_index(op.f('ix_user_email'), 'user', ['email'], unique=True) - # ### end Alembic commands ### diff --git a/crawler/api/app.py b/crawler/api/app.py index c894b19..55d172a 100644 --- a/crawler/api/app.py +++ b/crawler/api/app.py @@ -1,25 +1,30 @@ -"""FastAPI application for the Real Estate Crawler API.""" from datetime import datetime, timedelta import json import logging import logging.config -from typing import Annotated, Optional +from typing import Annotated from api.auth import get_current_user from api.config import DEV_TIER_ORIGINS, PROD_TIER_ORIGINS from dotenv import load_dotenv from fastapi import Depends, FastAPI, Query -from fastapi.responses import StreamingResponse from api.auth import User -from models.listing import QueryParameters, ListingType, FurnishType +from models.listing import QueryParameters from notifications import send_notification +from rec import districts +from redis_repository import RedisRepository from repositories.listing_repository import ListingRepository from database import engine from fastapi.middleware.cors import CORSMiddleware -from ui_exporter import convert_to_geojson_feature, convert_row_to_geojson -from services import listing_service, export_service, district_service, task_service +from tasks import listing_tasks +from ui_exporter import export_immoweb +from alembic import command +from alembic.config import Config +from contextlib import asynccontextmanager +from celery.exceptions import TaskRevokedError +from celery_app import app as celery_app from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor -from api.metrics import metrics_app +from api.metrics import metrics_app # Import the Prometheus ASGI app from opentelemetry.metrics import get_meter @@ -27,35 +32,17 @@ load_dotenv() logger = logging.getLogger("uvicorn") -def get_query_parameters( - listing_type: ListingType, - min_bedrooms: int = 1, - max_bedrooms: int = 999, - min_price: int = 0, - max_price: int = 10_000_000, - min_sqm: Optional[int] = None, - last_seen_days: Optional[int] = None, - let_date_available_from: Optional[datetime] = None, - furnish_types: Optional[str] = None, # comma-separated list -) -> QueryParameters: - """Parse query parameters into QueryParameters model.""" - parsed_furnish_types = None - if furnish_types: - parsed_furnish_types = [FurnishType(f.strip()) for f in furnish_types.split(",")] - - return QueryParameters( - listing_type=listing_type, - min_bedrooms=min_bedrooms, - max_bedrooms=max_bedrooms, - min_price=min_price, - max_price=max_price, - min_sqm=min_sqm, - last_seen_days=last_seen_days, - let_date_available_from=let_date_available_from, - furnish_types=parsed_furnish_types, - ) +# @asynccontextmanager +# async def lifespan(app: FastAPI): +# alembic_cfg = Config("./alembic.ini") +# logger.info("Running alembic migrations") +# command.upgrade(alembic_cfg, "head") +# logger.info("Finished running alembic migrations") +# yield +# logger.warning("Shutting down") +# app = FastAPI(lifespan=lifespan) app = FastAPI() app.mount("/metrics", metrics_app) meter = get_meter(__name__) @@ -79,121 +66,52 @@ app.add_middleware( @app.get("/api/status") -async def get_status() -> dict[str, str]: +async def get_status(): request_counter.add(1, {"method": "GET", "path": "/status"}) hist.record(1.5, {"method": "GET", "path": "/status"}) return {"status": "OK"} @app.get("/api/listing") -async def get_listing( - user: Annotated[User, Depends(get_current_user)], - limit: int = 5, -) -> dict[str, list]: - """Get listings from the database.""" +async def get_listing(user: Annotated[User, Depends(get_current_user)]): repository = ListingRepository(engine) - result = await listing_service.get_listings(repository, limit=limit) - logger.info(f"Fetched {result.total_count} listings for {user.email}") - return {"listings": result.listings} + listings = await repository.get_listings(limit=5) + logger.info(f"Fetched {len(listings)} listings") + return {"listings": listings} @app.get("/api/listing_geojson") async def get_listing_geojson( user: Annotated[User, Depends(get_current_user)], - query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)], - limit: int = 1000, # Default limit to prevent timeout -) -> dict: - """Get listings as GeoJSON for map display.""" + query_parameters: Annotated[QueryParameters, Query()], +): repository = ListingRepository(engine) - result = await export_service.export_to_geojson( - repository, - query_parameters=query_parameters, - limit=limit, - ) - return result.data - - -@app.get("/api/listing_geojson/stream") -async def stream_listing_geojson( - user: Annotated[User, Depends(get_current_user)], - query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)], - batch_size: int = 50, - limit: int = 1000, -) -> StreamingResponse: - """Stream listings as NDJSON for progressive map loading. - - Returns newline-delimited JSON with three message types: - - metadata: Initial message with batch_size and total_expected count - - batch: Array of GeoJSON features - - complete: Final message with total count - """ - async def generate(): - repository = ListingRepository(engine) - - # Phase 1: Fast count for progress estimation - total = repository.count_listings(query_parameters) - effective_total = min(limit, total) if limit else total - - yield json.dumps({ - "type": "metadata", - "batch_size": batch_size, - "total_expected": effective_total, - }) + "\n" - - # Phase 2: Stream with column projection and keyset pagination - count = 0 - batch = [] - for row in repository.stream_listings_optimized( - query_parameters, limit=limit, page_size=batch_size - ): - feature = convert_row_to_geojson(row) - batch.append(feature) - count += 1 - - if len(batch) >= batch_size: - yield json.dumps({"type": "batch", "features": batch}) + "\n" - batch = [] - - # Send remaining - if batch: - yield json.dumps({"type": "batch", "features": batch}) + "\n" - - # Final message - yield json.dumps({"type": "complete", "total": count}) + "\n" - - return StreamingResponse( - generate(), - media_type="application/x-ndjson", - headers={ - "Cache-Control": "no-cache", - "X-Accel-Buffering": "no", # Disable nginx buffering - } + geojson_data = await export_immoweb( + repository, query_parameters=query_parameters, limit=None ) + return geojson_data @app.post("/api/refresh_listings") async def refresh_listings( user: Annotated[User, Depends(get_current_user)], - query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)], + query_parameters: Annotated[QueryParameters, Query()], ) -> dict[str, str]: - """Trigger a background task to refresh listings.""" await send_notification( f"{user.email} refreshing listings with query parameters {query_parameters.model_dump_json()}" ) - - repository = ListingRepository(engine) - result = await listing_service.refresh_listings( - repository, - query_parameters, - async_mode=True, - user_email=user.email, + # await listing_tasks.async_dump_listings_task(query_parameters.model_dump_json()) # Use this for local debugging - run task in sync + # return {} + # TODO: rate limit + expiry_time = datetime.now() + timedelta(minutes=10) + task = listing_tasks.dump_listings_task.apply_async( + args=(query_parameters.model_dump_json(),), + expires=expiry_time, ) - # Track task for user - if result.task_id: - task_service.add_task_for_user(user.email, result.task_id) - - return {"task_id": result.task_id or "", "message": result.message} + redis_repository = RedisRepository.instance() + redis_repository.add_task_for_user(user, task.id) + return {"task_id": task.id} @app.get("/api/task_status") @@ -201,12 +119,16 @@ async def get_task_status( user: Annotated[User, Depends(get_current_user)], task_id: str, ) -> dict[str, str]: - """Get the status of a background task.""" - status = task_service.get_task_status(task_id) + task_result = listing_tasks.dump_listings_task.AsyncResult(task_id) + try: + result = json.dumps(task_result.result) + except Exception: + result = str(task_result.result) + return { - "task_id": status.task_id, - "status": status.status, - "result": json.dumps(status.result) if status.result else "", + "task_id": task_id, + "status": task_result.status, + "result": result, } @@ -214,36 +136,16 @@ async def get_task_status( async def get_tasks_for_user( user: Annotated[User, Depends(get_current_user)], ) -> list[str]: - """Get all task IDs for the current user.""" - return task_service.get_user_tasks(user.email) - - -@app.post("/api/cancel_task") -async def cancel_task( - user: Annotated[User, Depends(get_current_user)], - task_id: str = Query(..., description="The task ID to cancel"), -) -> dict[str, str | bool]: - """Cancel a running task.""" - # Verify user owns this task - user_tasks = task_service.get_user_tasks(user.email) - if task_id not in user_tasks: - return {"success": False, "message": "Task not found or not owned by user"} - - try: - task_service.cancel_task(task_id) - logger.info(f"Task {task_id} cancelled by {user.email}") - return {"success": True, "message": "Task cancelled"} - except Exception as e: - logger.error(f"Failed to cancel task {task_id}: {e}") - return {"success": False, "message": str(e)} + redis_repository = RedisRepository.instance() + user_tasks = redis_repository.get_tasks_for_user(user) + return user_tasks @app.get("/api/get_districts") async def get_districts( user: Annotated[User, Depends(get_current_user)], ) -> dict[str, str]: - """Get all available districts.""" - return district_service.get_all_districts() + return districts.get_districts() FastAPIInstrumentor.instrument_app(app) diff --git a/crawler/config/__init__.py b/crawler/config/__init__.py deleted file mode 100644 index 315e8c3..0000000 --- a/crawler/config/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Configuration modules.""" -from config.schedule_config import ScheduleConfig, SchedulesConfig - -__all__ = ["ScheduleConfig", "SchedulesConfig"] diff --git a/crawler/config/schedule_config.py b/crawler/config/schedule_config.py deleted file mode 100644 index cd00ebf..0000000 --- a/crawler/config/schedule_config.py +++ /dev/null @@ -1,122 +0,0 @@ -"""Schedule configuration for periodic scraping tasks.""" -from __future__ import annotations - -import json -import logging -import os -import re -from typing import Self - -from pydantic import BaseModel, field_validator - -from models.listing import FurnishType, ListingType, QueryParameters - -logger = logging.getLogger("uvicorn.error") - -# Cron field validation patterns -CRON_MINUTE_PATTERN = re.compile(r"^(\*|([0-5]?\d)(,[0-5]?\d)*|\*/[1-9]\d*)$") -CRON_HOUR_PATTERN = re.compile(r"^(\*|(1?\d|2[0-3])(,(1?\d|2[0-3]))*|\*/[1-9]\d*)$") -CRON_DAY_OF_WEEK_PATTERN = re.compile(r"^(\*|[0-6](,[0-6])*|\*/[1-6])$") - - -class ScheduleConfig(BaseModel): - """Configuration for a single periodic scrape schedule.""" - - name: str - enabled: bool = True - minute: str = "0" - hour: str = "2" - day_of_week: str = "*" - listing_type: ListingType - min_bedrooms: int = 1 - max_bedrooms: int = 999 - min_price: int = 0 - max_price: int = 10_000_000 - district_names: list[str] = [] - furnish_types: list[str] | None = None - - @field_validator("minute") - @classmethod - def validate_minute(cls, v: str) -> str: - """Validate cron minute field (0-59, *, or */N).""" - if not CRON_MINUTE_PATTERN.match(v): - raise ValueError( - f"Invalid cron minute '{v}'. Must be 0-59, *, */N, or comma-separated values." - ) - return v - - @field_validator("hour") - @classmethod - def validate_hour(cls, v: str) -> str: - """Validate cron hour field (0-23, *, or */N).""" - if not CRON_HOUR_PATTERN.match(v): - raise ValueError( - f"Invalid cron hour '{v}'. Must be 0-23, *, */N, or comma-separated values." - ) - return v - - @field_validator("day_of_week") - @classmethod - def validate_day_of_week(cls, v: str) -> str: - """Validate cron day_of_week field (0-6, *, or */N).""" - if not CRON_DAY_OF_WEEK_PATTERN.match(v): - raise ValueError( - f"Invalid cron day_of_week '{v}'. Must be 0-6, *, */N, or comma-separated values." - ) - return v - - def to_query_parameters(self) -> QueryParameters: - """Convert schedule config to QueryParameters for the scrape task.""" - furnish_types_enum: list[FurnishType] | None = None - if self.furnish_types: - furnish_types_enum = [FurnishType(ft) for ft in self.furnish_types] - - return QueryParameters( - listing_type=self.listing_type, - min_bedrooms=self.min_bedrooms, - max_bedrooms=self.max_bedrooms, - min_price=self.min_price, - max_price=self.max_price, - district_names=set(self.district_names), - furnish_types=furnish_types_enum, - ) - - -class SchedulesConfig(BaseModel): - """Container for multiple schedule configurations.""" - - schedules: list[ScheduleConfig] = [] - - @classmethod - def from_env(cls, env_var: str = "SCRAPE_SCHEDULES") -> Self: - """Load schedules from environment variable. - - Args: - env_var: Name of the environment variable containing JSON config. - - Returns: - SchedulesConfig instance with parsed schedules. - - Raises: - ValueError: If the JSON is invalid or schedule validation fails. - """ - raw_value = os.environ.get(env_var, "").strip() - - if not raw_value: - logger.info(f"No {env_var} configured, no periodic scrapes will be scheduled") - return cls(schedules=[]) - - try: - parsed = json.loads(raw_value) - except json.JSONDecodeError as e: - raise ValueError(f"Invalid JSON in {env_var}: {e}") from e - - if not isinstance(parsed, list): - raise ValueError(f"{env_var} must be a JSON array") - - schedules = [ScheduleConfig.model_validate(item) for item in parsed] - return cls(schedules=schedules) - - def get_enabled_schedules(self) -> list[ScheduleConfig]: - """Return only enabled schedules.""" - return [s for s in self.schedules if s.enabled] diff --git a/crawler/docker-compose.yml b/crawler/docker-compose.yml deleted file mode 100644 index b071423..0000000 --- a/crawler/docker-compose.yml +++ /dev/null @@ -1,105 +0,0 @@ -version: "3.8" - -services: - redis: - image: redis:8 - container_name: rec-redis - ports: - - "6379:6379" - volumes: - - redis_data:/data - command: ["redis-server", "--appendonly", "yes"] - healthcheck: - test: ["CMD", "redis-cli", "ping"] - interval: 5s - timeout: 3s - retries: 5 - - mysql: - image: mysql:9 - container_name: rec-mysql - ports: - - "3306:3306" - environment: - MYSQL_ROOT_PASSWORD: rootpass - MYSQL_DATABASE: wrongmove - MYSQL_USER: wrongmove - MYSQL_PASSWORD: wrongmove - volumes: - - mysql_data:/var/lib/mysql - healthcheck: - test: ["CMD", "mysqladmin", "ping", "-h", "localhost"] - interval: 10s - timeout: 5s - retries: 5 - - app: - build: - context: . - dockerfile: Dockerfile - container_name: rec-app - ports: - - "5001:5001" - volumes: - # Bind mount source code for development - - .:/app - # Preserve virtual environment in container - - app_venv:/app/.venv - environment: - - ENV=dev - - DB_CONNECTION_STRING=mysql://wrongmove:wrongmove@mysql:3306/wrongmove - - CELERY_BROKER_URL=redis://redis:6379/0 - - CELERY_RESULT_BACKEND=redis://redis:6379/0 - - ROUTING_API_KEY=${ROUTING_API_KEY:-} - depends_on: - redis: - condition: service_healthy - mysql: - condition: service_healthy - command: ["uvicorn", "api.app:app", "--host", "0.0.0.0", "--port", "5001", "--reload", "--reload-dir", "api", "--reload-dir", "services", "--reload-dir", "repositories", "--reload-dir", "models"] - - celery: - build: - context: . - dockerfile: Dockerfile - container_name: rec-celery - volumes: - - .:/app - - app_venv:/app/.venv - environment: - - ENV=dev - - DB_CONNECTION_STRING=mysql://wrongmove:wrongmove@mysql:3306/wrongmove - - CELERY_BROKER_URL=redis://redis:6379/0 - - CELERY_RESULT_BACKEND=redis://redis:6379/0 - - ROUTING_API_KEY=${ROUTING_API_KEY:-} - - SCRAPE_SCHEDULES=${SCRAPE_SCHEDULES:-} - depends_on: - redis: - condition: service_healthy - mysql: - condition: service_healthy - command: ["celery", "-A", "celery_app", "worker", "--loglevel=info"] - - celery-beat: - build: - context: . - dockerfile: Dockerfile - container_name: rec-celery-beat - volumes: - - .:/app - - app_venv:/app/.venv - environment: - - ENV=dev - - DB_CONNECTION_STRING=mysql://wrongmove:wrongmove@mysql:3306/wrongmove - - CELERY_BROKER_URL=redis://redis:6379/0 - - CELERY_RESULT_BACKEND=redis://redis:6379/0 - - SCRAPE_SCHEDULES=${SCRAPE_SCHEDULES:-} - depends_on: - - redis - - celery - command: ["celery", "-A", "celery_app", "beat", "--loglevel=info"] - -volumes: - redis_data: - mysql_data: - app_venv: diff --git a/crawler/frontend/src/App.css b/crawler/frontend/src/App.css index 77e7d11..5efa2df 100644 --- a/crawler/frontend/src/App.css +++ b/crawler/frontend/src/App.css @@ -1,14 +1,50 @@ #root { - margin: 0; - padding: 0; - height: 100%; - overflow: hidden; + margin: 0 auto; + padding: 2rem; + text-align: center; +} + +.logo { + height: 6em; + padding: 1.5em; + will-change: filter; + transition: filter 300ms; +} + +.logo:hover { + filter: drop-shadow(0 0 2em #646cffaa); +} + +.logo.react:hover { + filter: drop-shadow(0 0 2em #61dafbaa); +} + +@keyframes logo-spin { + from { + transform: rotate(0deg); + } + + to { + transform: rotate(360deg); + } +} + +@media (prefers-reduced-motion: no-preference) { + a:nth-of-type(2) .logo { + animation: logo-spin infinite 20s linear; + } +} + +.card { + padding: 2em; +} + +.read-the-docs { + color: #888; } html, body { overflow: hidden; height: 100%; - margin: 0; - padding: 0; } diff --git a/crawler/frontend/src/App.tsx b/crawler/frontend/src/App.tsx index 08be85c..621947b 100644 --- a/crawler/frontend/src/App.tsx +++ b/crawler/frontend/src/App.tsx @@ -1,38 +1,94 @@ import type { User } from 'oidc-client-ts'; -import { useEffect, useState, useRef, useCallback } from 'react'; +import { useEffect, useState } from 'react'; import './App.css'; -import { getUser, handleCallback } from './auth/authService'; +import { AppSidebar } from './AppSidebar'; +import { getUser, handleCallback, logout } from './auth/authService'; +import ActiveQuery from './components/ActiveQuery'; import AlertError from './components/AlertError'; import LoginModal from './components/LoginModal'; import { Map } from './components/Map'; -import { FilterPanel, type ParameterValues, DEFAULT_FILTER_VALUES } from './components/FilterPanel'; -import { Header } from './components/Header'; -import { StatsBar, type ViewMode } from './components/StatsBar'; -import { ListView } from './components/ListView'; -import { StreamingProgressBar } from './components/StreamingProgressBar'; -import { Sheet, SheetContent, SheetTrigger } from './components/ui/sheet'; +import { Parameters, type ParameterValues } from './components/Parameters'; +import { Spinner } from './components/Spinner'; +import { Breadcrumb, BreadcrumbItem, BreadcrumbLink, BreadcrumbList, BreadcrumbPage, BreadcrumbSeparator } from './components/ui/breadcrumb'; import { Button } from './components/ui/button'; -import { Filter } from 'lucide-react'; -import type { GeoJSONFeatureCollection, PropertyProperties, PropertyFeature } from '@/types'; -import { refreshListings, fetchTasksForUser, streamListingGeoJSON, type StreamingProgress } from '@/services'; +import { Separator } from './components/ui/separator'; +import { SidebarInset, SidebarProvider, SidebarTrigger } from './components/ui/sidebar'; + +const fetchData = async (user: User, baseQueyrUri: string, parameters: ParameterValues, method: string = 'GET') => { + const accessToken = user.access_token; + const queryString = new URLSearchParams(); + queryString.append('listing_type', parameters.listing_type) + if (parameters.min_bedrooms) { + queryString.append('min_bedrooms', parameters.min_bedrooms.toString()); + } + if (parameters.max_bedrooms) { + queryString.append('max_bedrooms', parameters.max_bedrooms.toString()) + } + if (parameters.max_price) { + queryString.append("max_price", parameters.max_price.toString()); + } + if (parameters.min_price) { + queryString.append("min_price", parameters.min_price.toString()); + } + if (parameters.min_sqm) { + queryString.append("min_sqm", parameters.min_sqm.toString()); + } + if (parameters.last_seen_days) { + queryString.append("last_seen_days", parameters.last_seen_days.toString()); + } + if (parameters.available_from) { + queryString.append("let_date_available_from", parameters.available_from.toISOString()); + } + if (parameters.district) { + queryString.append("district_names", parameters.district); + } + + const response = await fetch(baseQueyrUri + '?' + queryString, + { + method: method, + headers: { + 'Authorization': `Bearer ${accessToken}`, // Pass the token + 'Content-Type': 'application/json', + }, + } + ); + if (!response.ok) { + throw new Error('Error: ' + response.status); + } + const data: Response = await response.json(); + return data; +}; + + +const fetchActiveTasksForUser = async (user: User) => { + const accessToken = user?.access_token; + const response = await fetch(`/api/tasks_for_user`, { + method: 'GET', + headers: { + 'Authorization': `Bearer ${accessToken}`, // Pass the token + 'Content-Type': 'application/json', + }, + }); + if (!response.ok) { + throw new Error(`Failed to fetch active tasks for user: ${response.status}`); + } + + const data = + await response.json(); + return data; +}; + + function App() { - const [listingData, setListingData] = useState(null); + const [listingData, setListingData] = useState({}); const [taskID, setTaskID] = useState(null); const [user, setUser] = useState(null); + const [isParametersModalOpen, setIsParametersModalOpen] = useState(true); const [queryParameters, setQueryParameters] = useState(null); const [submitError, setSubmitError] = useState(null); const [alertDialogIsOpen, setAlertDialogIsOpen] = useState(false); - const [isLoading, setIsLoading] = useState(false); - const [viewMode, setViewMode] = useState('map'); - const [mobileFilterOpen, setMobileFilterOpen] = useState(false); - const [highlightedProperty, setHighlightedProperty] = useState(null); - const [streamingProgress, setStreamingProgress] = useState(null); - - // Ref to track accumulated features during streaming - const accumulatedFeaturesRef = useRef([]); - // Ref to track if initial load has been triggered - const initialLoadTriggeredRef = useRef(false); + const [spinnerText, setSpinnerText] = useState(null); useEffect(() => { // Check if this is a callback from Authentik (after login) @@ -51,233 +107,101 @@ function App() { if (!user) { return; } - fetchTasksForUser(user).then((tasks) => { - if (tasks && tasks.length > 0) { - setTaskID(tasks[0]); + fetchActiveTasksForUser(user).then((tasks) => { + if (tasks) { + setTaskID(tasks[0]) } - }); - }, [user, taskID]); - - // Load listings function - used by both auto-load and manual submit - const loadListings = useCallback(async (parameters: ParameterValues) => { - if (!user) return; - - setQueryParameters(parameters); - setMobileFilterOpen(false); - setIsLoading(true); - accumulatedFeaturesRef.current = []; - setStreamingProgress({ count: 0 }); - setListingData(null); - - try { - for await (const batch of streamListingGeoJSON(user, parameters, (progress) => { - setStreamingProgress(progress); - })) { - accumulatedFeaturesRef.current.push(...batch); - setListingData({ - type: 'FeatureCollection', - features: [...accumulatedFeaturesRef.current] - }); - } - } catch (error) { - if (error instanceof Error) { - setSubmitError(error.message); - } else { - setSubmitError(String(error)); - } - setAlertDialogIsOpen(true); - } finally { - setIsLoading(false); - setStreamingProgress(null); - } - }, [user]); - - // Auto-load data with default filters when user is authenticated - useEffect(() => { - if (!user || initialLoadTriggeredRef.current) { - return; - } - initialLoadTriggeredRef.current = true; - - const defaultParams: ParameterValues = { - ...DEFAULT_FILTER_VALUES, - available_from: new Date(), - }; - - loadListings(defaultParams); - }, [user, loadListings]); + }) + }, [user, taskID]) if (!user) { - return ; + return } const onSubmit = async (action: 'fetch-data' | 'visualize', parameters: ParameterValues) => { + // Fetch listing data + setQueryParameters(parameters) + setIsParametersModalOpen(false) + let data = null; if (action === 'visualize') { - loadListings(parameters); - } else if (action === 'fetch-data') { - setQueryParameters(parameters); - setMobileFilterOpen(false); - setIsLoading(true); + setSpinnerText("Loading data for visualization...") try { - const data = await refreshListings(user!, parameters); - setTaskID(data.task_id); + data = await fetchData(user, "/api/listing_geojson", parameters); } catch (error) { - if (error instanceof Error) { - setSubmitError(error.message); - } else { - setSubmitError(String(error)); - } - setAlertDialogIsOpen(true); + // @ts-expect-error + setSubmitError(error.message) + setAlertDialogIsOpen(true) } finally { - setIsLoading(false); + setSpinnerText(null) + } + if (data) { + setListingData(data); + } + } else if (action === 'fetch-data') { + setSpinnerText("Submitting query to refresh listings...") + try { + data = await fetchData(user, "/api/refresh_listings", parameters, 'POST'); + // @ts-expect-error + setTaskID(data.task_id) + } catch (error) { + // @ts-expect-error + setSubmitError(error.message) + setAlertDialogIsOpen(true) + } finally { + setSpinnerText(null) } } - }; - - const handlePropertyClick = (property: PropertyProperties, _coordinates: [number, number]) => { - setHighlightedProperty(property.url); - // Optionally: pan map to coordinates - }; - - const renderMainContent = () => { - if (!listingData) { - return ( -
-
- {isLoading ? ( - <> -
🏠
-

Loading Properties...

-

- Fetching listings with default filters. You can adjust filters on the left. -

- - ) : ( - <> -
🏠
-

Welcome to Property Explorer

-

- Use the filters on the left to find properties. Apply filters to visualize existing data or refresh to fetch new listings. -

- - )} -
-
- ); - } - - if (listingData.features.length === 0) { - return ( -
-
-
🔍
-

No listings found

-

- Try adjusting the filters or run a data refresh to fetch new listings. -

-
-
- ); - } - - return ( - <> - {/* Map View */} - {(viewMode === 'map' || viewMode === 'split') && ( -
- -
- )} - - {/* List View */} - {(viewMode === 'list' || viewMode === 'split') && ( -
- -
- )} - - ); - }; - - const handleTaskCancelled = () => { - setTaskID(null); - }; + console.log(data) + } return ( -
- {/* Header */} -
+ <> + + + +
+ + + + + + + Building Your Application + + + + + Data Fetching + + + +
+
+
+

Welcome, {user.profile.email}!

+ + + + - {/* Main content area */} -
- {/* Filter Panel - Desktop (fixed sidebar) */} -
- -
+ + {spinnerText} + - {/* Filter Panel - Mobile (sheet) */} -
- - - - - - - - -
- - {/* Main View Area */} -
- {/* Streaming Progress Bar */} -
- -
- - {/* Map/List Container */} -
- {renderMainContent()} -
- - {/* Stats Bar */} - {listingData && listingData.features.length > 0 && ( -
-
- )} -
-
- - {/* Error Dialog */} - -
- ); + {Object.keys(listingData).length > 0 && +
+ +
+ } +
+
+
+ + ) } -export default App; +export default App diff --git a/crawler/frontend/src/assets/Map.css b/crawler/frontend/src/assets/Map.css index db47184..e65969f 100644 --- a/crawler/frontend/src/assets/Map.css +++ b/crawler/frontend/src/assets/Map.css @@ -1,80 +1,55 @@ #map-container { - position: absolute; + /* position: absolute; */ top: 0; bottom: 0; right: 0; left: 0; width: 100%; height: 100%; + flex: 1; + + /* position: 'relative'; */ + /* overflow: 'visible'; */ + /* isolation: 'isolate'; */ + } +.sidebar { + background-color: rgb(35 55 75 / 90%); + color: #fff; + padding: 6px 12px; + font-family: monospace; + z-index: 1; + position: absolute; + top: 0; + left: 0; + margin: 12px; + border-radius: 4px; +} + + #legend { - box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15); + box-shadow: 0 1px 2px rgba(0, 0, 0, 0.1); line-height: 18px; - height: auto; - min-height: 300px; - width: 90px; - padding: 12px; + height: 310px; + width: 60px; + padding: 10px; position: absolute; - top: 10px; - right: 10px; - background: rgba(255, 255, 255, 0.95); - border-radius: 8px; - font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; -} - -#legend .axis path, -#legend .axis line { - stroke: #e5e7eb; -} - -#legend .axis text { - fill: #6b7280; + top: 0; + right: 0; + background: rgba(255, 255, 255, 0.8); + margin-top: 40px; + margin-right: 20px; + font-family: Arial, sans-serif; } .propertyListingPopupItem { - display: flex; + display: 'flex'; box-sizing: border-box; + border: 1px solid #aaa; justify-content: center; - font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; - padding: 6px; + font-family: sans-serif; + padding: 8px; width: 50%; -} - -/* Mapbox popup styling improvements */ -.mapboxgl-popup-content { - padding: 0 !important; - border-radius: 8px !important; - overflow: hidden; - box-shadow: 0 4px 20px rgba(0, 0, 0, 0.15) !important; -} - -.mapboxgl-popup-close-button { - font-size: 20px; - padding: 4px 8px; - color: #6b7280; - z-index: 10; -} - -.mapboxgl-popup-close-button:hover { - background-color: #f3f4f6; - color: #111827; -} - -/* Improve marker visibility */ -.mapboxgl-marker { - cursor: pointer; -} - -/* Mobile adjustments */ -@media (max-width: 768px) { - #legend { - width: 75px; - padding: 8px; - min-height: 250px; - } - - .mapboxgl-popup-content { - max-width: 90vw !important; - } + /* 2 columns */ } diff --git a/crawler/frontend/src/components/ActiveQuery.tsx b/crawler/frontend/src/components/ActiveQuery.tsx index 9d5af53..b23f58e 100644 --- a/crawler/frontend/src/components/ActiveQuery.tsx +++ b/crawler/frontend/src/components/ActiveQuery.tsx @@ -1,128 +1,141 @@ import { getUser } from '@/auth/authService'; -import { POLLING_INTERVALS } from '@/constants'; -import { fetchTaskStatus, cancelTask } from '@/services'; -import { TaskStatus, type TaskResult } from '@/types'; import type { User } from 'oidc-client-ts'; import React, { useEffect, useState } from 'react'; import AlertError from './AlertError'; import { Spinner } from './Spinner'; import { HoverCard, HoverCardContent, HoverCardTrigger } from './ui/hover-card'; import { Progress } from './ui/progress'; -import { Button } from './ui/button'; -import { X } from 'lucide-react'; -interface ActiveQueryProps { +interface ModalProps { taskID: string | null; - onTaskCancelled?: () => void; } -const ActiveQuery: React.FC = ({ taskID, onTaskCancelled }) => { +const fetchTaskStatusData = async (user: User, taskID: string) => { + const accessToken = user?.access_token; + const response = await fetch(`/api/task_status?task_id=${taskID}`, { + method: 'GET', + headers: { + 'Authorization': `Bearer ${accessToken}`, // Pass the token + 'Content-Type': 'application/json', + }, + }); + if (!response.ok) { + throw new Error(`Failed to fetch task status: ${response.status}`); + } + + const data = + await response.json(); + return data; +}; + +type TaskStatus = string +// enum TaskStatus { +// QUEUED = 'queued', +// PROCESSING = 'processing', +// COMPLETED = 'completed', +// FAILED = 'failed', +// } + +// const taskStatusToProgress = (taskStatus: TaskStatus): number => { +// switch (taskStatus) { +// case TaskStatus.QUEUED: +// return 0.33; // Queued status +// case TaskStatus.PROCESSING: +// return 0.66; // Processing status +// case TaskStatus.COMPLETED: +// return 1.0; // Completed status +// default: +// throw new Error('Unknown task status: ' + status); +// } +// } + +// const getTaskStatus = (status: string): TaskStatus => { +// switch (status.toLowerCase()) { +// case 'queued': +// return TaskStatus.QUEUED; +// case 'processing': +// return TaskStatus.PROCESSING; +// case 'completed': +// return TaskStatus.COMPLETED; +// case 'failed': +// return TaskStatus.FAILED; +// default: +// throw new Error('Unknown task status: ' + status); +// } +// }; + +const ActiveQuery: React.FC = ({ + taskID +}) => { const [user, setUser] = useState(null); useEffect(() => { getUser().then(setUser); }, []); const [progressPercentage, setProgressPercentage] = useState(0); - const [taskStatus, setTaskStatus] = useState(TaskStatus.PENDING); + const [taskStatus, setTaskStatus] = useState("PENDING"); const [lastUpdateTime, setLastUpdateTime] = useState(new Date()); const [fetchStatusError, setFetchStatusError] = useState(null); const [alertDialogIsOpen, setAlertDialogIsOpen] = useState(false); - const [isCancelling, setIsCancelling] = useState(false); - const handleCancelTask = async () => { - if (!user || !taskID || isCancelling) return; - - setIsCancelling(true); - try { - const result = await cancelTask(user, taskID); - if (result.success) { - setTaskStatus(TaskStatus.REVOKED); - onTaskCancelled?.(); - } else { - setFetchStatusError(result.message); - setAlertDialogIsOpen(true); - } - } catch (error) { - setFetchStatusError(error instanceof Error ? error.message : 'Failed to cancel task'); - setAlertDialogIsOpen(true); - } finally { - setIsCancelling(false); - } - }; - - const pollTaskStatus = async (interval: NodeJS.Timeout) => { + const fetchTaskStatus = async (interval: NodeJS.Timeout) => { if (!user || !taskID) { return; } - + let data = null try { - const data = await fetchTaskStatus(user, taskID); - setLastUpdateTime(new Date()); - const status = data.status as TaskStatus; - setTaskStatus(status); - - if (status === TaskStatus.FAILURE || status === TaskStatus.REVOKED) { - clearInterval(interval); - setFetchStatusError('Task failed with status: ' + status); - setAlertDialogIsOpen(true); - return; - } - - if (status === TaskStatus.SUCCESS) { - clearInterval(interval); - setProgressPercentage(100); - return; - } - - // Only parse result for in-progress tasks - if (data.result) { - try { - const parsedResult: TaskResult = JSON.parse(data.result); - setProgressPercentage(parsedResult.progress * 100); - } catch { - // Result parsing failed, but task is still running - ignore - } - } - } catch (error) { + data = await fetchTaskStatusData(user, taskID); + } catch (error: any) { clearInterval(interval); - setTaskStatus(TaskStatus.FAILURE); - setAlertDialogIsOpen(true); + setTaskStatus("FAILURE") + setAlertDialogIsOpen(true) if (error instanceof Error) { - setFetchStatusError(error.message); + setFetchStatusError(error.message) } else { - setFetchStatusError('Failed to update task status: ' + String(error)); + setFetchStatusError('Failed to update task status: ' + error.toString()) } } + if (!data) { + clearInterval(interval); + return; + } + setLastUpdateTime(new Date()); + // const taskStatus = getTaskStatus(data.status); + const taskStatus = data.status; + setTaskStatus(taskStatus); + if (taskStatus === "FAILURE" || taskStatus === "REVOKED") { + clearInterval(interval); + throw new Error('Task failed. status: ' + taskStatus); + } + // const progress = taskStatusToProgress(taskStatus); + const parsedResult = JSON.parse(data.result) + setProgressPercentage(parsedResult.progress * 100); + if (taskStatus === "SUCCESS") { + clearInterval(interval); + return; + } }; + // fetch status periodically + // maybe move to ws one day useEffect(() => { - const interval = setInterval( - () => pollTaskStatus(interval), - POLLING_INTERVALS.TASK_STATUS_MS - ); + const interval = setInterval + (() => fetchTaskStatus(interval), 5000); // every 5 seconds return () => clearInterval(interval); - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [taskID, user]); + }, [taskID]); if (!taskID) { return null; } - const isInProgress = taskStatus && - taskStatus !== TaskStatus.SUCCESS && - taskStatus !== TaskStatus.FAILURE && - taskStatus !== TaskStatus.REVOKED; - return ( <> -
+
- -
- {taskStatus && Task: {taskStatus}} - {isInProgress && } -
- + + {taskStatus && <>Task status: {taskStatus} } + + {taskStatus && taskStatus !== 'SUCCESS' && taskStatus !== 'FAILURE' && taskStatus !== 'REVOKED' && } Task ID: {taskID} @@ -130,22 +143,10 @@ const ActiveQuery: React.FC = ({ taskID, onTaskCancelled }) => Last updated: {lastUpdateTime.toLocaleString()}
- {isInProgress && ( - - )}
- ); + ) }; export default ActiveQuery; diff --git a/crawler/frontend/src/components/FilterPanel.tsx b/crawler/frontend/src/components/FilterPanel.tsx deleted file mode 100644 index 16b39e6..0000000 --- a/crawler/frontend/src/components/FilterPanel.tsx +++ /dev/null @@ -1,546 +0,0 @@ -import { zodResolver } from "@hookform/resolvers/zod"; -import { useState, useEffect } from "react"; -import { useForm } from "react-hook-form"; -import { z } from "zod"; -import { Button } from "./ui/button"; -import { Calendar29 } from "./ui/DatePicker"; -import { Form, FormControl, FormDescription, FormField, FormItem, FormLabel, FormMessage } from "./ui/form"; -import { Input } from "./ui/input"; -import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select"; -import { Accordion, AccordionContent, AccordionItem, AccordionTrigger } from "./ui/accordion"; -import { Loader2, Filter, RefreshCw } from "lucide-react"; -import { ScrollArea } from "./ui/scroll-area"; - -export enum Metric { - qmprice = 'qmprice', - rooms = 'rooms', - qm = 'qm', - price = 'total_price', -} - -export enum ListingType { - RENT = 'RENT', - BUY = 'BUY' -} - -export enum FurnishType { - FURNISHED = 'furnished', - PART_FURNISHED = 'partFurnished', - UNFURNISHED = 'unfurnished', -} - -// Default filter values - exported so App.tsx can use them for initial load -export const DEFAULT_FILTER_VALUES = { - metric: Metric.qmprice, - listing_type: ListingType.RENT, - min_bedrooms: 1, - max_bedrooms: 3, - max_price: 3000, - min_price: 2000, - min_sqm: 50, - max_sqm: undefined, - min_price_per_sqm: undefined, - max_price_per_sqm: undefined, - last_seen_days: 28, - available_from: new Date(), - district: '', - furnish_types: [] as FurnishType[], -} as const; - -export interface ParameterValues { - metric: Metric - listing_type: ListingType - min_bedrooms?: number - max_bedrooms?: number - min_price?: number - max_price?: number - min_sqm?: number - max_sqm?: number - min_price_per_sqm?: number - max_price_per_sqm?: number - last_seen_days?: number - available_from?: Date - district: string - furnish_types?: FurnishType[] -} - -interface FilterPanelProps { - onSubmit: (action: 'fetch-data' | 'visualize', fromValues: ParameterValues) => void; - isLoading?: boolean; - listingCount?: number; -} - -const formSchema = z.object({ - metric: z.nativeEnum(Metric, { required_error: "Metric is required" }), - listing_type: z.nativeEnum(ListingType, { required_error: "Listing Type is required" }), - min_bedrooms: z.number().min(0).max(10).optional(), - max_bedrooms: z.number().min(0).max(10).optional(), - max_price: z.number().optional(), - min_price: z.number().min(0).optional(), - min_sqm: z.number().optional(), - max_sqm: z.number().optional(), - min_price_per_sqm: z.number().optional(), - max_price_per_sqm: z.number().optional(), - last_seen_days: z.number().min(0).optional(), - available_from: z.date(), - district: z.string(), - furnish_types: z.array(z.nativeEnum(FurnishType)).optional(), -}); - -type FormValues = z.infer; - -export function FilterPanel({ onSubmit, isLoading, listingCount }: FilterPanelProps) { - const [availableFromRawInput, setAvailableFromRawInput] = useState("now"); - const [selectedFurnishTypes, setSelectedFurnishTypes] = useState([]); - - const form = useForm({ - resolver: zodResolver(formSchema), - defaultValues: { - ...DEFAULT_FILTER_VALUES, - available_from: new Date(), // Fresh date on each render - }, - }); - - const handleFormSubmit = (action: 'fetch-data' | 'visualize') => { - return form.handleSubmit((values) => { - const params: ParameterValues = { - ...values, - furnish_types: selectedFurnishTypes.length > 0 ? selectedFurnishTypes : undefined, - }; - onSubmit(action, params); - })(); - }; - - const toggleFurnishType = (type: FurnishType) => { - setSelectedFurnishTypes(prev => - prev.includes(type) - ? prev.filter(t => t !== type) - : [...prev, type] - ); - }; - - // Count active filters - const countActiveFilters = (): number => { - const values = form.getValues(); - let count = 0; - if (values.min_bedrooms && values.min_bedrooms > 0) count++; - if (values.max_bedrooms && values.max_bedrooms < 10) count++; - if (values.min_price && values.min_price > 0) count++; - if (values.max_price) count++; - if (values.min_sqm && values.min_sqm > 0) count++; - if (values.max_sqm) count++; - if (values.min_price_per_sqm) count++; - if (values.max_price_per_sqm) count++; - if (values.district && values.district.length > 0) count++; - if (selectedFurnishTypes.length > 0) count++; - if (values.last_seen_days && values.last_seen_days < 365) count++; - return count; - }; - - const [activeFilterCount, setActiveFilterCount] = useState(0); - - useEffect(() => { - const subscription = form.watch(() => { - setActiveFilterCount(countActiveFilters()); - }); - return () => subscription.unsubscribe(); - }, [form, selectedFurnishTypes]); - - return ( -
- {/* Header */} -
-
- -

Filters

- {activeFilterCount > 0 && ( - - {activeFilterCount} - - )} -
- {listingCount !== undefined && ( -

- {listingCount.toLocaleString()} listings -

- )} -
- - {/* Filters */} - -
- - - {/* Visualization Options */} - - - Visualization - - -
- ( - - Color by - - - - )} - /> - ( - - Type - - - - )} - /> -
-
-
- - {/* Price & Size */} - - - Price & Size - - -
-
- ( - - Min Price (£) - - field.onChange(e.target.value ? Number(e.target.value) : undefined)} - /> - - - )} - /> - ( - - Max Price (£) - - field.onChange(e.target.value ? Number(e.target.value) : undefined)} - /> - - - )} - /> -
-
- ( - - Min Size (m²) - - field.onChange(e.target.value ? Number(e.target.value) : undefined)} - /> - - - )} - /> - ( - - Max Size (m²) - - field.onChange(e.target.value ? Number(e.target.value) : undefined)} - /> - - - )} - /> -
-
- ( - - Min £/m² - - field.onChange(e.target.value ? Number(e.target.value) : undefined)} - /> - - - )} - /> - ( - - Max £/m² - - field.onChange(e.target.value ? Number(e.target.value) : undefined)} - /> - - - )} - /> -
-
-
-
- - {/* Features */} - - - Features - - -
-
- ( - - Min Beds - - field.onChange(e.target.value ? Number(e.target.value) : undefined)} - /> - - - )} - /> - ( - - Max Beds - - field.onChange(e.target.value ? Number(e.target.value) : undefined)} - /> - - - )} - /> -
-
- Furnishing -
- {[ - { value: FurnishType.FURNISHED, label: 'Furnished' }, - { value: FurnishType.PART_FURNISHED, label: 'Part' }, - { value: FurnishType.UNFURNISHED, label: 'Unfurn.' }, - ].map((option) => ( - - ))} -
-
-
-
-
- - {/* Location */} - - - Location - - - ( - - District - - - - - Comma-separated list of districts - - - )} - /> - - - - {/* Availability */} - - - Availability - - -
- ( - - Available From - - - - - Rental listings only - - - )} - /> - ( - - Last Seen (days) - - field.onChange(e.target.value ? Number(e.target.value) : undefined)} - /> - - - Show listings seen in last N days - - - )} - /> -
-
-
-
-
- -
- - {/* Action Buttons */} -
- - -
-
- ); -} diff --git a/crawler/frontend/src/components/Header.tsx b/crawler/frontend/src/components/Header.tsx deleted file mode 100644 index b81b6d6..0000000 --- a/crawler/frontend/src/components/Header.tsx +++ /dev/null @@ -1,80 +0,0 @@ -import type { User } from 'oidc-client-ts'; -import { Button } from './ui/button'; -import { Separator } from './ui/separator'; -import { LogOut, Home, Filter } from 'lucide-react'; -import { logout } from '@/auth/authService'; -import { HealthIndicator } from './HealthIndicator'; -import { TaskIndicator } from './TaskIndicator'; - -interface HeaderProps { - user: User; - activeFilterCount?: number; - taskID?: string | null; - isLoading?: boolean; - onToggleFilters?: () => void; - showFilterToggle?: boolean; - onTaskCancelled?: () => void; -} - -export function Header({ - user, - activeFilterCount = 0, - taskID, - onToggleFilters, - showFilterToggle = false, - onTaskCancelled, -}: HeaderProps) { - return ( -
- {/* Logo / Brand */} -
- - Wrongmove -
- - - - {/* Health Indicator */} - - - {/* Task Indicator */} - - - {/* Filter Toggle (mobile) */} - {showFilterToggle && ( - - )} - - {/* Spacer */} -
- - {/* User Menu */} -
- - {user.profile.email} - - -
-
- ); -} diff --git a/crawler/frontend/src/components/HealthIndicator.tsx b/crawler/frontend/src/components/HealthIndicator.tsx deleted file mode 100644 index d511b02..0000000 --- a/crawler/frontend/src/components/HealthIndicator.tsx +++ /dev/null @@ -1,83 +0,0 @@ -import { useEffect, useState } from 'react'; -import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from './ui/tooltip'; -import { checkBackendHealth, type HealthStatus, type HealthCheckResult } from '@/services'; -import { Circle, Loader2 } from 'lucide-react'; - -interface HealthIndicatorProps { - /** How often to check health in milliseconds (default: 30000 = 30s) */ - interval?: number; -} - -export function HealthIndicator({ interval = 30000 }: HealthIndicatorProps) { - const [health, setHealth] = useState({ status: 'checking' }); - - useEffect(() => { - // Initial check - checkBackendHealth().then(setHealth); - - // Periodic checks - const intervalId = setInterval(() => { - checkBackendHealth().then(setHealth); - }, interval); - - return () => clearInterval(intervalId); - }, [interval]); - - const getStatusColor = (status: HealthStatus) => { - switch (status) { - case 'healthy': - return 'text-green-500'; - case 'unhealthy': - return 'text-red-500'; - case 'checking': - return 'text-muted-foreground'; - } - }; - - const getStatusLabel = (status: HealthStatus) => { - switch (status) { - case 'healthy': - return 'Connected'; - case 'unhealthy': - return 'Disconnected'; - case 'checking': - return 'Checking...'; - } - }; - - const getTooltipContent = () => { - if (health.status === 'checking') { - return 'Checking backend connection...'; - } - - if (health.status === 'healthy') { - return `Backend connected (${health.latencyMs}ms)`; - } - - return `Backend unavailable: ${health.error || 'Unknown error'}`; - }; - - return ( - - - -
- {health.status === 'checking' ? ( - - ) : ( - - )} - -
-
- -

{getTooltipContent()}

-
-
-
- ); -} diff --git a/crawler/frontend/src/components/Map.tsx b/crawler/frontend/src/components/Map.tsx index 0bd0669..0b1337c 100644 --- a/crawler/frontend/src/components/Map.tsx +++ b/crawler/frontend/src/components/Map.tsx @@ -1,241 +1,169 @@ +// @ts-nocheck import crossfilter from "crossfilter2"; import * as d3 from "d3"; import mapboxgl from "mapbox-gl"; -import 'mapbox-gl/dist/mapbox-gl.css'; -import { useEffect, useRef, useMemo, useCallback } from "react"; +import 'mapbox-gl/dist/mapbox-gl.css'; // this hides the map for some reason +import { useEffect, useRef } from "react"; import { renderToString } from 'react-dom/server'; import "../assets/Map.css"; import { Metric, type ParameterValues } from "./Parameters"; -import { PropertyCard } from "./PropertyCard"; +import { Button } from "./ui/button"; import { ScrollArea } from "./ui/scroll-area"; -import type { GeoJSONFeatureCollection, PropertyFeature, PropertyProperties } from "@/types"; -import { MAP_CONFIG, HEATMAP_CONFIG, PERCENTILE_CONFIG } from "@/constants"; -import { getColorSchemeForMetric, getMetricInterpretation } from "@/constants/colorSchemes"; -import { clone, percentile, calculateColorStops } from "@/utils/mapUtils"; +import { Separator } from "./ui/separator"; -// Type declaration for the external HexgridHeatmap library -declare class HexgridHeatmap { - _tree: { - search: (bounds: { minX: number; maxX: number; minY: number; maxY: number }) => PropertyWithCoords[]; - }; - constructor(map: mapboxgl.Map, id: string, beforeLayer: string); - setIntensity(value: number): void; - setSpread(value: number): void; - setCellDensity(value: number): void; - setPropertyName(name: string): void; - setData(data: GeoJSONFeatureCollection): void; - setColorStops(stops: [number, string][]): void; - update(): void; -} - -interface PropertyWithCoords { - properties: PropertyProperties; -} - -interface CrossfilterRecord extends PropertyProperties { - index: number; -} - -interface MapProps { - listingData: GeoJSONFeatureCollection; - queryParameters: ParameterValues | null; - onPropertyClick?: (property: PropertyProperties, coordinates: [number, number]) => void; -} - -interface FilterState { - city: string; - country: string | null; - mode: string; - count?: number; -} - -export function Map(props: MapProps) { +export function Map( + props: { + listingData: any; + queryParameters: ParameterValues | null; + } +) { const data = props.listingData; + var crossData = data.features.map(function (d, i) { + //clone properties + var props = clone(d['properties']); + props['index'] = i; + return props; + }); + const cf = crossfilter(crossData); + const qmDim = cf.dimension(function (d) { return d.qm; }); + const cityDim = cf.dimension(function (d) { return d.city; }); + const countryDim = cf.dimension(function (d) { return d.country; }); + const rentDim = cf.dimension(function (d) { return d.total_price; }); + const roomsDim = cf.dimension(function (d) { return d.rooms; }); + const urlDim = cf.dimension(function (d) { return d.url; }); + const indexDim = cf.dimension(function (d) { return d.index; }); + let heatmap = null; - const mapRef = useRef(null); - const mapContainerRef = useRef(null); - const heatmapRef = useRef(null); - const updateTimeoutRef = useRef(null); - const isMapLoadedRef = useRef(false); - const lastDataLengthRef = useRef(0); - - const filter: FilterState = { city: 'London', country: null, mode: Metric.qmprice }; + // rivet + var filter = { city: 'London', country: null, mode: Metric.qmprice }; + // filter['countries'] = Array.from(new Set(data.features.map(function (d) { return d['properties']['country'] }))); if (props.queryParameters) { - filter.mode = props.queryParameters.metric; + filter['mode'] = props.queryParameters.metric; + } + // rivets.bind(document.getElementById('overlay'), { filter: filter }); + const mapRef = useRef(mapboxgl.Map) + const mapContainerRef = useRef('map-container') + useEffect(() => { + mapboxgl.accessToken = 'pk.eyJ1IjoiZGktdG8iLCJhIjoiY2o0bnBoYXcxMW1mNzJ3bDhmc2xiNWttaiJ9.ZccatVk_4shzoAsEUXXecA'; + mapRef.current = new mapboxgl.Map({ + container: mapContainerRef.current, + style: 'mapbox://styles/mapbox/light-v9', + center: [13.38032, 49.994210], + zoom: 5 + }); + mapRef.current.on('load', function () { + update() + }) + mapRef.current.on('click', function (e) { + openListingsDialog(e.lngLat.lng, e.lngLat.lat); + }) + return () => { + mapRef.current.remove() + } + }, [data]) + + + function clone(d) { + return JSON.parse(JSON.stringify(d)); } - // Get appropriate color scheme based on metric - const colorScheme = useMemo(() => { - return getColorSchemeForMetric(filter.mode); - }, [filter.mode]); + function percentile(arr, p) { + if (arr.length === 0) return 0; + if (typeof p !== 'number') throw new TypeError('p must be a number'); + if (p <= 0) return arr[0]; + if (p >= 1) return arr[arr.length - 1]; - const metricInfo = useMemo(() => { - return getMetricInterpretation(filter.mode); - }, [filter.mode]); + var index = arr.length * p, + lower = Math.floor(index), + upper = lower + 1, + weight = index % 1; - // Calculate average price per sqm for property cards - const avgPricePerSqm = useMemo(() => { - const validPrices = data.features - .map((f) => f.properties.qmprice) - .filter((p): p is number => typeof p === 'number' && p > 0); - return validPrices.length > 0 - ? validPrices.reduce((a, b) => a + b, 0) / validPrices.length - : 0; - }, [data]); + if (upper >= arr.length) return arr[lower]; + return arr[lower] * (1 - weight) + arr[upper] * weight; + } - // Build crossfilter data - const buildCrossfilterData = useCallback(() => { - return data.features.map(function (d: PropertyFeature, i: number) { - const propsCopy = clone(d.properties) as CrossfilterRecord; - propsCopy.index = i; - return propsCopy; - }); - }, [data]); - - const updateHeatmap = useCallback(() => { - if (!mapRef.current || !isMapLoadedRef.current) return; - - const crossData = buildCrossfilterData(); - const cf = crossfilter(crossData); - const qmDim = cf.dimension(function (d: CrossfilterRecord) { return d.qm; }); - const cityDim = cf.dimension(function (d: CrossfilterRecord) { return d.city; }); - const countryDim = cf.dimension(function (d: CrossfilterRecord) { return d.country; }); - const indexDim = cf.dimension(function (d: CrossfilterRecord) { return d.index; }); - - // Create heatmap if it doesn't exist - if (!heatmapRef.current) { - heatmapRef.current = new HexgridHeatmap(mapRef.current, "hexgrid-heatmap", "waterway-label"); - heatmapRef.current.setIntensity(HEATMAP_CONFIG.INTENSITY); - heatmapRef.current.setSpread(HEATMAP_CONFIG.SPREAD); - heatmapRef.current.setCellDensity(HEATMAP_CONFIG.CELL_DENSITY); - } - - const heatmap = heatmapRef.current; + function update() { + // init heatmap + heatmap = new HexgridHeatmap(mapRef.current, "hexgrid-heatmap", "waterway-label"); + heatmap.setIntensity(9); // dunno yet + heatmap.setSpread(0.05); // dunno yet + heatmap.setCellDensity(0.5); // small value == bigger hexagons heatmap.setPropertyName(filter.mode); if (filter.mode === Metric.qmprice) { - qmDim.filter((d) => (d as number) > 0); + // if we visualize sqm based data, remove properties where we have no data + qmDim.filter(function (d) { return d > 0; }); } + + // set filter if (filter.city) { cityDim.filterExact(filter.city); } else if (filter.country) { countryDim.filterExact(filter.country); + } else { + alert('nothing loadable'); } + filter.count = cityDim.top(Infinity).length; - const subset: GeoJSONFeatureCollection = { type: "FeatureCollection", features: [] }; - indexDim.top(Infinity).forEach(function (i: CrossfilterRecord) { + var subset = { "type": "FeatureCollection", "features": [] }; + indexDim.top(Infinity).forEach(function (i) { subset.features.push(data.features[i.index]); }); - // Update heatmap data + loadData(heatmap, subset); + } + + function loadData(heatmap, subset) { heatmap.setData(subset); - let values = subset.features.map(function (d: PropertyFeature) { - return d.properties[filter.mode as keyof PropertyProperties] as number; + var values = subset.features.map(function (d) { return d['properties'][filter.mode] }); + values = values.sort(function (a, b) { return a - b; }); + + // setting the color stops, min is at 5th percentile, max at 95percentile + var min = values[Math.round(values.length * 0.05)]; + var max = values[Math.round(values.length * 0.95)]; + var colorStopsPerc = [ + [0, "rgba(0,185,243,0)"], + [25, "rgba(0,185,243,0.24)"], + [60, "rgba(255,223,0,0.3)"], + [100, "rgba(255,105,0,0.3)"], + ]; + makeLegend(colorStopsPerc, min, max); + var colorStopsValue = colorStopsPerc.map(function (d) { + return [min + d[0] * (max - min) / 100, d[1]]; }); - values = values.sort(function (a: number, b: number) { return a - b; }); - - const minIndex = Math.round(values.length * PERCENTILE_CONFIG.MIN_BOUND); - const maxIndex = Math.round(values.length * PERCENTILE_CONFIG.MAX_BOUND); - const min = values[minIndex]; - const max = values[maxIndex]; - - makeLegend(colorScheme, min, max); - const colorStopsValue = calculateColorStops(colorScheme, min, max); heatmap.setColorStops(colorStopsValue); heatmap.update(); - // Fit bounds only on first load or significant data change - if (lastDataLengthRef.current === 0 && subset.features.length > 0) { - const longitudes = subset.features.map(function (d: PropertyFeature) { return d.geometry.coordinates[0]; }).sort(function (a: number, b: number) { return a - b; }); - const latitudes = subset.features.map(function (d: PropertyFeature) { return d.geometry.coordinates[1]; }).sort(function (a: number, b: number) { return a - b; }); - const minlng = percentile(longitudes, PERCENTILE_CONFIG.BOUNDS_CLIP_MIN); - const maxlng = percentile(longitudes, PERCENTILE_CONFIG.BOUNDS_CLIP_MAX); - const minlat = percentile(latitudes, PERCENTILE_CONFIG.BOUNDS_CLIP_MIN); - const maxlat = percentile(latitudes, PERCENTILE_CONFIG.BOUNDS_CLIP_MAX); + //get bounding box and zoom to that area + // we use a 1% percentile since some data can be corrupt + var longitudes = subset.features.map(function (d) { return d.geometry.coordinates[0]; }).sort(function (a, b) { return a - b; }); + var latitudes = subset.features.map(function (d) { return d.geometry.coordinates[1]; }).sort(function (a, b) { return a - b; }); + var minlng = percentile(longitudes, 0.01); + var maxlng = percentile(longitudes, 0.99); + var minlat = percentile(latitudes, 0.01); + var maxlat = percentile(latitudes, 0.99); + mapRef.current.fitBounds([ + [minlng, minlat], + [maxlng, maxlat] + ]); + } - mapRef.current?.fitBounds([ - [minlng, minlat], - [maxlng, maxlat] - ]); - } - - lastDataLengthRef.current = subset.features.length; - }, [data, filter.mode, filter.city, filter.country, colorScheme, buildCrossfilterData]); - - // Initialize map - useEffect(() => { - if (!mapContainerRef.current) return; - - mapboxgl.accessToken = MAP_CONFIG.MAPBOX_TOKEN; - mapRef.current = new mapboxgl.Map({ - container: mapContainerRef.current, - style: MAP_CONFIG.STYLE, - center: MAP_CONFIG.DEFAULT_CENTER, - zoom: MAP_CONFIG.DEFAULT_ZOOM - }); - mapRef.current.on('load', function () { - isMapLoadedRef.current = true; - lastDataLengthRef.current = 0; - updateHeatmap(); - }); - mapRef.current.on('click', function (e: mapboxgl.MapMouseEvent) { - openListingsDialog(e.lngLat.lng, e.lngLat.lat); - }); - return () => { - if (updateTimeoutRef.current) { - clearTimeout(updateTimeoutRef.current); - } - heatmapRef.current = null; - isMapLoadedRef.current = false; - mapRef.current?.remove(); - }; - // eslint-disable-next-line react-hooks/exhaustive-deps - }, []); - - // Debounced update effect - only update after 200ms of no changes - useEffect(() => { - if (!isMapLoadedRef.current) return; - - // Clear any pending update - if (updateTimeoutRef.current) { - clearTimeout(updateTimeoutRef.current); - } - - // Schedule new update after 200ms of no changes - updateTimeoutRef.current = window.setTimeout(() => { - updateHeatmap(); - }, 200); - - return () => { - if (updateTimeoutRef.current) { - clearTimeout(updateTimeoutRef.current); - } - }; - }, [data, updateHeatmap]); - - function makeLegend(colorstops: [number, string][], minValue: number, maxValue: number) { - const svg_height = 280, svg_width = 80; + function makeLegend(colorstops, minValue, maxValue) { + /** + * colorstops: [[0, 'green'], [100, 'red']] + * @type {number} + */ + var svg_height = 300, svg_width = 70; + // clear svg before starting d3.select('#svg').selectAll('*').remove(); + // create a new SVG element const svg = d3.select('#svg'); - svg + var defs = svg .attr('height', svg_height) .attr('width', svg_width); - // Add metric name at top - svg.append("text") - .attr("x", svg_width / 2) - .attr("y", 12) - .attr("text-anchor", "middle") - .attr("font-size", "11px") - .attr("font-weight", "600") - .attr("fill", "#374151") - .text(metricInfo.name); - - const gradientTop = 30; - const gradientHeight = svg_height - 70; - - const linearGradient = svg.append("defs") + var linearGradient = svg.append("defs") .append("linearGradient") .attr("id", "linear-gradient"); @@ -246,104 +174,150 @@ export function Map(props: MapProps) { .attr("y2", "0%"); svg.append("rect") - .attr("x", 0) - .attr("y", gradientTop) - .attr("width", svg_width * 0.35) - .attr("height", gradientHeight) + .attr("width", svg_width * 0.4) + .attr("height", svg_height) .attr('rx', 4) .style("fill", "url(#linear-gradient)"); - colorstops.forEach(function (d: [number, string]) { + colorstops.forEach(function (d) { linearGradient.append("stop") .attr("offset", d[0] + "%") .attr("stop-color", d[1]); }); - const xScale = d3.scaleLinear().range([gradientHeight - 10, 0]).domain([minValue, maxValue]); - const xAxis = d3.axisRight(xScale).ticks(5).tickFormat((d) => { - const num = d as number; - if (num >= 1000) { - return `${(num / 1000).toFixed(1)}k`; - } - return String(Math.round(num)); - }); + + var xScale = d3.scaleLinear().range([svg_height - 20, 0]).domain([minValue, maxValue]); + var xAxis = d3.axisRight(xScale).ticks(5); svg.append("g") .attr("class", "axis") - .attr("transform", "translate(" + (svg_width * 0.38) + "," + (gradientTop + 5) + ")") - .call(xAxis) - .selectAll("text") - .attr("font-size", "10px"); - - // Add interpretation labels at bottom - svg.append("text") - .attr("x", svg_width / 2) - .attr("y", svg_height - 25) - .attr("text-anchor", "middle") - .attr("font-size", "9px") - .attr("fill", "#22c55e") - .text(metricInfo.low); - - svg.append("text") - .attr("x", svg_width / 2) - .attr("y", svg_height - 10) - .attr("text-anchor", "middle") - .attr("font-size", "9px") - .attr("fill", "#ef4444") - .text(metricInfo.high); + .attr("transform", "translate(" + svg_width / 2 + "," + (10) + ")") + .call(xAxis); } function openListingsDialog(longitude: number, latitude: number) { - if (!heatmapRef.current || !mapRef.current) return; - - const searchBuffer = HEATMAP_CONFIG.SEARCH_BUFFER; - const properties = heatmapRef.current._tree.search({ + const searchBuffer = 0.001 // ~100m + const properties = heatmap._tree.search({ minX: longitude - searchBuffer, maxX: longitude + searchBuffer, minY: latitude - searchBuffer, maxY: latitude + searchBuffer - }); + }) if (properties.length > 0) { const listingDialogPopup = getListingDialog(properties); new mapboxgl.Popup() .setLngLat([longitude, latitude]) .setHTML(renderToString(listingDialogPopup)) - .setMaxWidth("450px") + .setMaxWidth("500px") .addTo(mapRef.current); } } - function getListingDialog(properties: PropertyWithCoords[]) { - return ( - -
-
- {properties.length} properties in this area -
-
- {properties.map((property) => ( - - ))} -
+ function getListingDialog(properties) { + let listingComponents = []; + for (let property of properties) { + listingComponents.push(getPropertyComponent(property)); + } + return +
+ +
+ Showing {properties.length} properties
- - ); + {listingComponents.map((item) => { + const scrollDiv =
+ {item} + +
; + return scrollDiv + })} +
+
; } - return ( -
-
-
- + function getPropertyComponent(property) { + const priceHistoryHTMLs = property.properties.price_history.map((d) => { + return
  • ${d.last_seen.split('T')[0]}: £${d.price}
  • ; + }); + + let priceHistoryHTML = <>; + if (priceHistoryHTMLs.length > 1) { + priceHistoryHTML = +
    + Price history: +
      + ${priceHistoryHTMLs.join('')} +
    +
    +
    + + } + const lastSeenStr = property.properties.last_seen.split('T')[0]; + const lastSeenDays = Math.round((new Date() - new Date(lastSeenStr)) / (1000 * 60 * 60 * 24)); + return
    +
    + + + +
    +
    + Available from: +
    +
    + {property.properties.available_from} +
    +
    + Price: +
    +
    + £{property.properties.total_price} +
    + {priceHistoryHTML} +
    + Rooms: +
    +
    + + {property.properties.rooms} +
    +
    + Area: +
    +
    + {property.properties.qm} m² +
    +
    + Price per area: +
    +
    + £{property.properties.qmprice}/m² +
    +
    + Last seen: +
    +
    + {lastSeenDays} days ago +
    +
    + Agency: +
    +
    + {property.properties.agency} +
    +
    +
    - ); -} + } -// Re-export types for backwards compatibility -export { Metric, type ParameterValues } from "./Parameters"; + return <> +
    + +
    + + +
    + +} diff --git a/crawler/frontend/src/components/PropertyCard.tsx b/crawler/frontend/src/components/PropertyCard.tsx deleted file mode 100644 index 29ac5a0..0000000 --- a/crawler/frontend/src/components/PropertyCard.tsx +++ /dev/null @@ -1,188 +0,0 @@ -import { ExternalLink, Bed, Maximize2, PoundSterling, Clock, Building } from 'lucide-react'; -import { Button } from './ui/button'; -import type { PropertyProperties } from '@/types'; - -interface PropertyCardProps { - property: PropertyProperties; - variant?: 'compact' | 'full'; - isHighlighted?: boolean; - avgPricePerSqm?: number; - onClick?: () => void; -} - -export function PropertyCard({ - property, - variant = 'compact', - isHighlighted = false, - avgPricePerSqm, - onClick, -}: PropertyCardProps) { - const lastSeenDate = property.last_seen.split('T')[0]; - const lastSeenDays = Math.round((Date.now() - new Date(lastSeenDate).getTime()) / (1000 * 60 * 60 * 24)); - - // Determine if this is a good deal - const isGoodDeal = avgPricePerSqm && property.qmprice > 0 && property.qmprice < avgPricePerSqm * 0.9; - const isExpensive = avgPricePerSqm && property.qmprice > avgPricePerSqm * 1.1; - - const priceIndicator = isGoodDeal - ? { color: 'text-green-600 bg-green-50', label: 'Good deal' } - : isExpensive - ? { color: 'text-red-600 bg-red-50', label: 'Above avg' } - : null; - - const handleClick = () => { - window.open(property.url, '_blank', 'noopener,noreferrer'); - onClick?.(); - }; - - if (variant === 'compact') { - return ( -
    - {/* Thumbnail */} -
    - {property.photo_thumbnail && ( - Property - )} -
    - - {/* Details */} -
    -
    -
    - £{property.total_price.toLocaleString()} - /mo -
    - {priceIndicator && ( - - {priceIndicator.label} - - )} -
    - -
    - - - {property.rooms} - - - - {property.qm} m² - - - £{property.qmprice}/m² - -
    - -
    - - - {lastSeenDays}d ago - - {property.agency} -
    -
    -
    - ); - } - - // Full variant (for popup/detail view) - return ( -
    - {/* Header with image and price */} -
    - - {property.photo_thumbnail && ( - Property - )} - - -
    -
    -
    -
    - £{property.total_price.toLocaleString()} - /mo -
    - {priceIndicator && ( - - {priceIndicator.label} - - )} -
    -
    -
    -
    - - {/* Stats grid */} -
    -
    - - {property.rooms} bedrooms -
    -
    - - {property.qm} -
    -
    - - £{property.qmprice}/m² -
    -
    - - Available {property.available_from} -
    -
    - - {/* Agency and last seen */} -
    - - {property.agency} - - Seen {lastSeenDays} days ago -
    - - {/* Price history */} - {property.price_history.length > 1 && ( -
    -
    Price history
    -
    - {property.price_history.slice(0, 5).map((entry) => ( -
    - {entry.last_seen.split('T')[0]} - £{entry.price.toLocaleString()} -
    - ))} -
    -
    - )} - - {/* Actions */} - -
    - ); -} diff --git a/crawler/frontend/src/components/TaskIndicator.tsx b/crawler/frontend/src/components/TaskIndicator.tsx deleted file mode 100644 index 403692c..0000000 --- a/crawler/frontend/src/components/TaskIndicator.tsx +++ /dev/null @@ -1,166 +0,0 @@ -import { getUser } from '@/auth/authService'; -import { POLLING_INTERVALS } from '@/constants'; -import { fetchTaskStatus, cancelTask } from '@/services'; -import { TaskStatus, type TaskResult } from '@/types'; -import type { User } from 'oidc-client-ts'; -import { useEffect, useState } from 'react'; -import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from './ui/tooltip'; -import { Button } from './ui/button'; -import { Loader2, CheckCircle2, XCircle, X } from 'lucide-react'; - -interface TaskIndicatorProps { - taskID: string | null; - onTaskCancelled?: () => void; -} - -export function TaskIndicator({ taskID, onTaskCancelled }: TaskIndicatorProps) { - const [user, setUser] = useState(null); - const [progressPercentage, setProgressPercentage] = useState(0); - const [taskStatus, setTaskStatus] = useState(null); - const [isCancelling, setIsCancelling] = useState(false); - - useEffect(() => { - getUser().then(setUser); - }, []); - - useEffect(() => { - if (!user || !taskID) { - setTaskStatus(null); - return; - } - - // Reset state for new task - setTaskStatus(TaskStatus.PENDING); - setProgressPercentage(0); - - const pollTaskStatus = async () => { - try { - const data = await fetchTaskStatus(user, taskID); - const status = data.status as TaskStatus; - setTaskStatus(status); - - if (status === TaskStatus.SUCCESS) { - setProgressPercentage(100); - return true; // Stop polling - } - - if (status === TaskStatus.FAILURE || status === TaskStatus.REVOKED) { - return true; // Stop polling - } - - // Parse progress for in-progress tasks - if (data.result) { - try { - const parsedResult: TaskResult = JSON.parse(data.result); - setProgressPercentage(parsedResult.progress * 100); - } catch { - // Ignore parsing errors - } - } - return false; // Continue polling - } catch { - setTaskStatus(TaskStatus.FAILURE); - return true; // Stop polling on error - } - }; - - // Initial poll - pollTaskStatus(); - - const interval = setInterval(async () => { - const shouldStop = await pollTaskStatus(); - if (shouldStop) { - clearInterval(interval); - } - }, POLLING_INTERVALS.TASK_STATUS_MS); - - return () => clearInterval(interval); - }, [taskID, user]); - - const handleCancel = async () => { - if (!user || !taskID || isCancelling) return; - - setIsCancelling(true); - try { - const result = await cancelTask(user, taskID); - if (result.success) { - setTaskStatus(TaskStatus.REVOKED); - onTaskCancelled?.(); - } - } catch { - // Ignore cancel errors - } finally { - setIsCancelling(false); - } - }; - - if (!taskID || !taskStatus) { - return null; - } - - const isInProgress = taskStatus !== TaskStatus.SUCCESS && - taskStatus !== TaskStatus.FAILURE && - taskStatus !== TaskStatus.REVOKED; - - const getStatusIcon = () => { - if (isInProgress) { - return ; - } - if (taskStatus === TaskStatus.SUCCESS) { - return ; - } - return ; - }; - - const getTooltipContent = () => { - if (isInProgress) { - return `Task running: ${Math.round(progressPercentage)}%`; - } - if (taskStatus === TaskStatus.SUCCESS) { - return 'Task completed successfully'; - } - if (taskStatus === TaskStatus.REVOKED) { - return 'Task was cancelled'; - } - return 'Task failed'; - }; - - return ( - -
    - - -
    - {getStatusIcon()} - - {isInProgress ? `${Math.round(progressPercentage)}%` : taskStatus} - -
    -
    - -

    {getTooltipContent()}

    -

    ID: {taskID.slice(0, 8)}...

    -
    -
    - {isInProgress && ( - - - - - -

    Cancel task

    -
    -
    - )} -
    -
    - ); -} diff --git a/crawler/frontend/src/services/healthService.ts b/crawler/frontend/src/services/healthService.ts deleted file mode 100644 index b5fb382..0000000 --- a/crawler/frontend/src/services/healthService.ts +++ /dev/null @@ -1,74 +0,0 @@ -// Health check service for backend connectivity - -export type HealthStatus = 'checking' | 'healthy' | 'unhealthy'; - -export interface HealthCheckResult { - status: HealthStatus; - latencyMs?: number; - error?: string; -} - -/** - * Check backend health by calling the /api/status endpoint - */ -export async function checkBackendHealth(): Promise { - const startTime = performance.now(); - - try { - const response = await fetch('/api/status', { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - }, - // Short timeout for health checks - signal: AbortSignal.timeout(5000), - }); - - const latencyMs = Math.round(performance.now() - startTime); - - if (!response.ok) { - return { - status: 'unhealthy', - latencyMs, - error: `HTTP ${response.status}`, - }; - } - - const data = await response.json(); - if (data.status === 'OK') { - return { - status: 'healthy', - latencyMs, - }; - } - - return { - status: 'unhealthy', - latencyMs, - error: 'Unexpected response', - }; - } catch (error) { - const latencyMs = Math.round(performance.now() - startTime); - - if (error instanceof Error) { - if (error.name === 'TimeoutError' || error.name === 'AbortError') { - return { - status: 'unhealthy', - latencyMs, - error: 'Request timeout', - }; - } - return { - status: 'unhealthy', - latencyMs, - error: error.message, - }; - } - - return { - status: 'unhealthy', - latencyMs, - error: 'Connection failed', - }; - } -} diff --git a/crawler/frontend/src/services/taskService.ts b/crawler/frontend/src/services/taskService.ts deleted file mode 100644 index cd94bf3..0000000 --- a/crawler/frontend/src/services/taskService.ts +++ /dev/null @@ -1,43 +0,0 @@ -// Task service for fetching task status - -import type { User } from 'oidc-client-ts'; -import type { TaskStatusResponse } from '@/types'; -import { apiRequest } from './apiClient'; -import { API_ENDPOINTS } from '@/constants'; - -export interface CancelTaskResponse { - success: boolean; - message: string; -} - -/** - * Fetch all active tasks for the current user - */ -export async function fetchTasksForUser(user: User): Promise { - return apiRequest(user, API_ENDPOINTS.TASKS_FOR_USER); -} - -/** - * Fetch the status of a specific task - */ -export async function fetchTaskStatus( - user: User, - taskId: string -): Promise { - return apiRequest(user, API_ENDPOINTS.TASK_STATUS, { - params: { task_id: taskId }, - }); -} - -/** - * Cancel a running task - */ -export async function cancelTask( - user: User, - taskId: string -): Promise { - return apiRequest(user, API_ENDPOINTS.CANCEL_TASK, { - method: 'POST', - params: { task_id: taskId }, - }); -} diff --git a/crawler/tasks/listing_tasks.py b/crawler/tasks/listing_tasks.py index 33afe45..3738d32 100644 --- a/crawler/tasks/listing_tasks.py +++ b/crawler/tasks/listing_tasks.py @@ -1,50 +1,35 @@ import asyncio +import importlib import itertools import logging from typing import Any from celery import Task -from celery.schedules import crontab from celery_app import app -from config.schedule_config import SchedulesConfig from listing_processor import ListingProcessor -from models.listing import Listing, QueryParameters +from models.listing import Listing, ListingType, QueryParameters from rec.districts import get_districts from rec.query import listing_query from repositories.listing_repository import ListingRepository from database import engine -from services import image_fetcher, floorplan_detector -from utils.redis_lock import redis_lock + +dump_images_module = importlib.import_module("3_dump_images") +detect_floorplan_module = importlib.import_module("4_detect_floorplan") logger = logging.getLogger("uvicorn.error") -SCRAPE_LOCK_NAME = "scrape_listings" - @app.task(bind=True, pydantic=True) def dump_listings_task(self: Task, parameters_json: str) -> dict[str, Any]: - with redis_lock(SCRAPE_LOCK_NAME) as acquired: - if not acquired: - logger.warning("Another scrape job is already running, skipping this execution") - self.update_state(state="SKIPPED", meta={"reason": "Another scrape job is running"}) - return {"status": "skipped", "reason": "another_job_running"} - - logger.info(f"Acquired lock: {SCRAPE_LOCK_NAME}") - parsed_parameters = QueryParameters.model_validate_json(parameters_json) - self.update_state(state="Starting...", meta={"progress": 0}) - asyncio.run(dump_listings_full(task=self, parameters=parsed_parameters)) - return {"progress": 0} + parsed_parameters = QueryParameters.model_validate_json(parameters_json) + self.update_state(state="Starting...", meta={"progress": 0}) + asyncio.run(dump_listings_full(task=self, parameters=parsed_parameters)) + return {"progress": 0} async def async_dump_listings_task(parameters_json: str) -> dict[str, Any]: - with redis_lock(SCRAPE_LOCK_NAME) as acquired: - if not acquired: - logger.warning("Another scrape job is already running, skipping this execution") - return {"status": "skipped", "reason": "another_job_running"} - - logger.info(f"Acquired lock: {SCRAPE_LOCK_NAME}") - parsed_parameters = QueryParameters.model_validate_json(parameters_json) - await dump_listings_full(task=Task(), parameters=parsed_parameters) - return {"progress": 0} + parsed_parameters = QueryParameters.model_validate_json(parameters_json) + await dump_listings_full(task=Task(), parameters=parsed_parameters) + return {"progress": 0} async def dump_listings_full( @@ -98,27 +83,19 @@ async def dump_listings_and_monitor( @app.on_after_finalize.connect def setup_periodic_tasks(sender, **kwargs): - """Register periodic tasks from environment configuration.""" - try: - config = SchedulesConfig.from_env() - except ValueError as e: - logger.error(f"Failed to load schedule configuration: {e}") - return - - for schedule in config.get_enabled_schedules(): - logger.info( - f"Registering periodic task: {schedule.name} at {schedule.hour}:{schedule.minute}" - ) - - sender.add_periodic_task( - crontab( - minute=schedule.minute, - hour=schedule.hour, - day_of_week=schedule.day_of_week, - ), - dump_listings_task.s(schedule.to_query_parameters().model_dump_json()), - name=schedule.name, - ) + sender.add_periodic_task( + 3600 * 24, # Daily updates + dump_listings_task.s( + QueryParameters( + listing_type=ListingType.RENT, + min_bedrooms=2, + max_bedrooms=3, + min_price=2000, + max_price=4000, + ).model_dump_json() + ), + name="Daily dump of interesting rent listings", + ) async def get_ids_to_process(