From c7ac448f15f08cab0b4e9d0cd04fbcc6f1a7539c Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 1 Feb 2026 17:28:37 +0000 Subject: [PATCH] Add configurable scheduling, UI health/task indicators, and auto-load map with default filters --- crawler/.env.sample | 9 + crawler/api/app.py | 208 +++++-- crawler/config/__init__.py | 4 + crawler/config/schedule_config.py | 122 ++++ crawler/docker-compose.yml | 105 ++++ crawler/frontend/src/App.css | 48 +- crawler/frontend/src/App.tsx | 388 ++++++++----- crawler/frontend/src/assets/Map.css | 97 ++-- .../frontend/src/components/ActiveQuery.tsx | 203 ++++--- .../frontend/src/components/FilterPanel.tsx | 546 ++++++++++++++++++ crawler/frontend/src/components/Header.tsx | 80 +++ .../src/components/HealthIndicator.tsx | 83 +++ crawler/frontend/src/components/Map.tsx | 506 ++++++++-------- .../frontend/src/components/PropertyCard.tsx | 188 ++++++ .../frontend/src/components/TaskIndicator.tsx | 166 ++++++ .../frontend/src/services/healthService.ts | 74 +++ crawler/frontend/src/services/taskService.ts | 43 ++ crawler/tasks/listing_tasks.py | 73 ++- 18 files changed, 2287 insertions(+), 656 deletions(-) create mode 100644 crawler/config/__init__.py create mode 100644 crawler/config/schedule_config.py create mode 100644 crawler/docker-compose.yml create mode 100644 crawler/frontend/src/components/FilterPanel.tsx create mode 100644 crawler/frontend/src/components/Header.tsx create mode 100644 crawler/frontend/src/components/HealthIndicator.tsx create mode 100644 crawler/frontend/src/components/PropertyCard.tsx create mode 100644 crawler/frontend/src/components/TaskIndicator.tsx create mode 100644 crawler/frontend/src/services/healthService.ts create mode 100644 crawler/frontend/src/services/taskService.ts diff --git a/crawler/.env.sample b/crawler/.env.sample index 567d519..1e7fe00 100644 --- a/crawler/.env.sample +++ b/crawler/.env.sample @@ -6,3 +6,12 @@ export ROUTING_API_KEY="" # fetch from https://console.cloud.google.c export DB_CONNECTION_STRING="sqlite:///data/wrongmove.db" # by default use SQLite locally export CELERY_BROKER_URL="redis://localhost:6379/0" # processing background tasks export CELERY_RESULT_BACKEND="redis://localhost:6379/1" + +# Periodic scraping schedules (JSON array) +# Each schedule has: name, enabled, hour, minute, day_of_week, listing_type, min/max_bedrooms, min/max_price, district_names, furnish_types +# Cron fields: minute (0-59), hour (0-23), day_of_week (0-6, 0=Sunday) +# Example: +# SCRAPE_SCHEDULES='[{"name":"Daily RENT","listing_type":"RENT","hour":"2","min_bedrooms":2,"max_bedrooms":3,"min_price":2000,"max_price":4000}]' +# Multiple schedules: +# SCRAPE_SCHEDULES='[{"name":"RENT 2am","listing_type":"RENT","hour":"2"},{"name":"BUY 4am","listing_type":"BUY","hour":"4"}]' +SCRAPE_SCHEDULES= diff --git a/crawler/api/app.py b/crawler/api/app.py index 55d172a..c894b19 100644 --- a/crawler/api/app.py +++ b/crawler/api/app.py @@ -1,30 +1,25 @@ +"""FastAPI application for the Real Estate Crawler API.""" from datetime import datetime, timedelta import json import logging import logging.config -from typing import Annotated +from typing import Annotated, Optional from api.auth import get_current_user from api.config import DEV_TIER_ORIGINS, PROD_TIER_ORIGINS from dotenv import load_dotenv from fastapi import Depends, FastAPI, Query +from fastapi.responses import StreamingResponse from api.auth import User -from models.listing import QueryParameters +from models.listing import QueryParameters, ListingType, FurnishType from notifications import send_notification -from rec import districts -from redis_repository import RedisRepository from repositories.listing_repository import ListingRepository from database import engine from fastapi.middleware.cors import CORSMiddleware +from ui_exporter import convert_to_geojson_feature, convert_row_to_geojson -from tasks import listing_tasks -from ui_exporter import export_immoweb -from alembic import command -from alembic.config import Config -from contextlib import asynccontextmanager -from celery.exceptions import TaskRevokedError -from celery_app import app as celery_app +from services import listing_service, export_service, district_service, task_service from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor -from api.metrics import metrics_app # Import the Prometheus ASGI app +from api.metrics import metrics_app from opentelemetry.metrics import get_meter @@ -32,17 +27,35 @@ load_dotenv() logger = logging.getLogger("uvicorn") -# @asynccontextmanager -# async def lifespan(app: FastAPI): -# alembic_cfg = Config("./alembic.ini") -# logger.info("Running alembic migrations") -# command.upgrade(alembic_cfg, "head") -# logger.info("Finished running alembic migrations") -# yield -# logger.warning("Shutting down") +def get_query_parameters( + listing_type: ListingType, + min_bedrooms: int = 1, + max_bedrooms: int = 999, + min_price: int = 0, + max_price: int = 10_000_000, + min_sqm: Optional[int] = None, + last_seen_days: Optional[int] = None, + let_date_available_from: Optional[datetime] = None, + furnish_types: Optional[str] = None, # comma-separated list +) -> QueryParameters: + """Parse query parameters into QueryParameters model.""" + parsed_furnish_types = None + if furnish_types: + parsed_furnish_types = [FurnishType(f.strip()) for f in furnish_types.split(",")] + + return QueryParameters( + listing_type=listing_type, + min_bedrooms=min_bedrooms, + max_bedrooms=max_bedrooms, + min_price=min_price, + max_price=max_price, + min_sqm=min_sqm, + last_seen_days=last_seen_days, + let_date_available_from=let_date_available_from, + furnish_types=parsed_furnish_types, + ) -# app = FastAPI(lifespan=lifespan) app = FastAPI() app.mount("/metrics", metrics_app) meter = get_meter(__name__) @@ -66,52 +79,121 @@ app.add_middleware( @app.get("/api/status") -async def get_status(): +async def get_status() -> dict[str, str]: request_counter.add(1, {"method": "GET", "path": "/status"}) hist.record(1.5, {"method": "GET", "path": "/status"}) return {"status": "OK"} @app.get("/api/listing") -async def get_listing(user: Annotated[User, Depends(get_current_user)]): +async def get_listing( + user: Annotated[User, Depends(get_current_user)], + limit: int = 5, +) -> dict[str, list]: + """Get listings from the database.""" repository = ListingRepository(engine) - listings = await repository.get_listings(limit=5) - logger.info(f"Fetched {len(listings)} listings") - return {"listings": listings} + result = await listing_service.get_listings(repository, limit=limit) + logger.info(f"Fetched {result.total_count} listings for {user.email}") + return {"listings": result.listings} @app.get("/api/listing_geojson") async def get_listing_geojson( user: Annotated[User, Depends(get_current_user)], - query_parameters: Annotated[QueryParameters, Query()], -): + query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)], + limit: int = 1000, # Default limit to prevent timeout +) -> dict: + """Get listings as GeoJSON for map display.""" repository = ListingRepository(engine) - geojson_data = await export_immoweb( - repository, query_parameters=query_parameters, limit=None + result = await export_service.export_to_geojson( + repository, + query_parameters=query_parameters, + limit=limit, + ) + return result.data + + +@app.get("/api/listing_geojson/stream") +async def stream_listing_geojson( + user: Annotated[User, Depends(get_current_user)], + query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)], + batch_size: int = 50, + limit: int = 1000, +) -> StreamingResponse: + """Stream listings as NDJSON for progressive map loading. + + Returns newline-delimited JSON with three message types: + - metadata: Initial message with batch_size and total_expected count + - batch: Array of GeoJSON features + - complete: Final message with total count + """ + async def generate(): + repository = ListingRepository(engine) + + # Phase 1: Fast count for progress estimation + total = repository.count_listings(query_parameters) + effective_total = min(limit, total) if limit else total + + yield json.dumps({ + "type": "metadata", + "batch_size": batch_size, + "total_expected": effective_total, + }) + "\n" + + # Phase 2: Stream with column projection and keyset pagination + count = 0 + batch = [] + for row in repository.stream_listings_optimized( + query_parameters, limit=limit, page_size=batch_size + ): + feature = convert_row_to_geojson(row) + batch.append(feature) + count += 1 + + if len(batch) >= batch_size: + yield json.dumps({"type": "batch", "features": batch}) + "\n" + batch = [] + + # Send remaining + if batch: + yield json.dumps({"type": "batch", "features": batch}) + "\n" + + # Final message + yield json.dumps({"type": "complete", "total": count}) + "\n" + + return StreamingResponse( + generate(), + media_type="application/x-ndjson", + headers={ + "Cache-Control": "no-cache", + "X-Accel-Buffering": "no", # Disable nginx buffering + } ) - return geojson_data @app.post("/api/refresh_listings") async def refresh_listings( user: Annotated[User, Depends(get_current_user)], - query_parameters: Annotated[QueryParameters, Query()], + query_parameters: Annotated[QueryParameters, Depends(get_query_parameters)], ) -> dict[str, str]: + """Trigger a background task to refresh listings.""" await send_notification( f"{user.email} refreshing listings with query parameters {query_parameters.model_dump_json()}" ) - # await listing_tasks.async_dump_listings_task(query_parameters.model_dump_json()) # Use this for local debugging - run task in sync - # return {} - # TODO: rate limit - expiry_time = datetime.now() + timedelta(minutes=10) - task = listing_tasks.dump_listings_task.apply_async( - args=(query_parameters.model_dump_json(),), - expires=expiry_time, + + repository = ListingRepository(engine) + result = await listing_service.refresh_listings( + repository, + query_parameters, + async_mode=True, + user_email=user.email, ) - redis_repository = RedisRepository.instance() - redis_repository.add_task_for_user(user, task.id) - return {"task_id": task.id} + # Track task for user + if result.task_id: + task_service.add_task_for_user(user.email, result.task_id) + + return {"task_id": result.task_id or "", "message": result.message} @app.get("/api/task_status") @@ -119,16 +201,12 @@ async def get_task_status( user: Annotated[User, Depends(get_current_user)], task_id: str, ) -> dict[str, str]: - task_result = listing_tasks.dump_listings_task.AsyncResult(task_id) - try: - result = json.dumps(task_result.result) - except Exception: - result = str(task_result.result) - + """Get the status of a background task.""" + status = task_service.get_task_status(task_id) return { - "task_id": task_id, - "status": task_result.status, - "result": result, + "task_id": status.task_id, + "status": status.status, + "result": json.dumps(status.result) if status.result else "", } @@ -136,16 +214,36 @@ async def get_task_status( async def get_tasks_for_user( user: Annotated[User, Depends(get_current_user)], ) -> list[str]: - redis_repository = RedisRepository.instance() - user_tasks = redis_repository.get_tasks_for_user(user) - return user_tasks + """Get all task IDs for the current user.""" + return task_service.get_user_tasks(user.email) + + +@app.post("/api/cancel_task") +async def cancel_task( + user: Annotated[User, Depends(get_current_user)], + task_id: str = Query(..., description="The task ID to cancel"), +) -> dict[str, str | bool]: + """Cancel a running task.""" + # Verify user owns this task + user_tasks = task_service.get_user_tasks(user.email) + if task_id not in user_tasks: + return {"success": False, "message": "Task not found or not owned by user"} + + try: + task_service.cancel_task(task_id) + logger.info(f"Task {task_id} cancelled by {user.email}") + return {"success": True, "message": "Task cancelled"} + except Exception as e: + logger.error(f"Failed to cancel task {task_id}: {e}") + return {"success": False, "message": str(e)} @app.get("/api/get_districts") async def get_districts( user: Annotated[User, Depends(get_current_user)], ) -> dict[str, str]: - return districts.get_districts() + """Get all available districts.""" + return district_service.get_all_districts() FastAPIInstrumentor.instrument_app(app) diff --git a/crawler/config/__init__.py b/crawler/config/__init__.py new file mode 100644 index 0000000..315e8c3 --- /dev/null +++ b/crawler/config/__init__.py @@ -0,0 +1,4 @@ +"""Configuration modules.""" +from config.schedule_config import ScheduleConfig, SchedulesConfig + +__all__ = ["ScheduleConfig", "SchedulesConfig"] diff --git a/crawler/config/schedule_config.py b/crawler/config/schedule_config.py new file mode 100644 index 0000000..cd00ebf --- /dev/null +++ b/crawler/config/schedule_config.py @@ -0,0 +1,122 @@ +"""Schedule configuration for periodic scraping tasks.""" +from __future__ import annotations + +import json +import logging +import os +import re +from typing import Self + +from pydantic import BaseModel, field_validator + +from models.listing import FurnishType, ListingType, QueryParameters + +logger = logging.getLogger("uvicorn.error") + +# Cron field validation patterns +CRON_MINUTE_PATTERN = re.compile(r"^(\*|([0-5]?\d)(,[0-5]?\d)*|\*/[1-9]\d*)$") +CRON_HOUR_PATTERN = re.compile(r"^(\*|(1?\d|2[0-3])(,(1?\d|2[0-3]))*|\*/[1-9]\d*)$") +CRON_DAY_OF_WEEK_PATTERN = re.compile(r"^(\*|[0-6](,[0-6])*|\*/[1-6])$") + + +class ScheduleConfig(BaseModel): + """Configuration for a single periodic scrape schedule.""" + + name: str + enabled: bool = True + minute: str = "0" + hour: str = "2" + day_of_week: str = "*" + listing_type: ListingType + min_bedrooms: int = 1 + max_bedrooms: int = 999 + min_price: int = 0 + max_price: int = 10_000_000 + district_names: list[str] = [] + furnish_types: list[str] | None = None + + @field_validator("minute") + @classmethod + def validate_minute(cls, v: str) -> str: + """Validate cron minute field (0-59, *, or */N).""" + if not CRON_MINUTE_PATTERN.match(v): + raise ValueError( + f"Invalid cron minute '{v}'. Must be 0-59, *, */N, or comma-separated values." + ) + return v + + @field_validator("hour") + @classmethod + def validate_hour(cls, v: str) -> str: + """Validate cron hour field (0-23, *, or */N).""" + if not CRON_HOUR_PATTERN.match(v): + raise ValueError( + f"Invalid cron hour '{v}'. Must be 0-23, *, */N, or comma-separated values." + ) + return v + + @field_validator("day_of_week") + @classmethod + def validate_day_of_week(cls, v: str) -> str: + """Validate cron day_of_week field (0-6, *, or */N).""" + if not CRON_DAY_OF_WEEK_PATTERN.match(v): + raise ValueError( + f"Invalid cron day_of_week '{v}'. Must be 0-6, *, */N, or comma-separated values." + ) + return v + + def to_query_parameters(self) -> QueryParameters: + """Convert schedule config to QueryParameters for the scrape task.""" + furnish_types_enum: list[FurnishType] | None = None + if self.furnish_types: + furnish_types_enum = [FurnishType(ft) for ft in self.furnish_types] + + return QueryParameters( + listing_type=self.listing_type, + min_bedrooms=self.min_bedrooms, + max_bedrooms=self.max_bedrooms, + min_price=self.min_price, + max_price=self.max_price, + district_names=set(self.district_names), + furnish_types=furnish_types_enum, + ) + + +class SchedulesConfig(BaseModel): + """Container for multiple schedule configurations.""" + + schedules: list[ScheduleConfig] = [] + + @classmethod + def from_env(cls, env_var: str = "SCRAPE_SCHEDULES") -> Self: + """Load schedules from environment variable. + + Args: + env_var: Name of the environment variable containing JSON config. + + Returns: + SchedulesConfig instance with parsed schedules. + + Raises: + ValueError: If the JSON is invalid or schedule validation fails. + """ + raw_value = os.environ.get(env_var, "").strip() + + if not raw_value: + logger.info(f"No {env_var} configured, no periodic scrapes will be scheduled") + return cls(schedules=[]) + + try: + parsed = json.loads(raw_value) + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON in {env_var}: {e}") from e + + if not isinstance(parsed, list): + raise ValueError(f"{env_var} must be a JSON array") + + schedules = [ScheduleConfig.model_validate(item) for item in parsed] + return cls(schedules=schedules) + + def get_enabled_schedules(self) -> list[ScheduleConfig]: + """Return only enabled schedules.""" + return [s for s in self.schedules if s.enabled] diff --git a/crawler/docker-compose.yml b/crawler/docker-compose.yml new file mode 100644 index 0000000..b071423 --- /dev/null +++ b/crawler/docker-compose.yml @@ -0,0 +1,105 @@ +version: "3.8" + +services: + redis: + image: redis:8 + container_name: rec-redis + ports: + - "6379:6379" + volumes: + - redis_data:/data + command: ["redis-server", "--appendonly", "yes"] + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 5 + + mysql: + image: mysql:9 + container_name: rec-mysql + ports: + - "3306:3306" + environment: + MYSQL_ROOT_PASSWORD: rootpass + MYSQL_DATABASE: wrongmove + MYSQL_USER: wrongmove + MYSQL_PASSWORD: wrongmove + volumes: + - mysql_data:/var/lib/mysql + healthcheck: + test: ["CMD", "mysqladmin", "ping", "-h", "localhost"] + interval: 10s + timeout: 5s + retries: 5 + + app: + build: + context: . + dockerfile: Dockerfile + container_name: rec-app + ports: + - "5001:5001" + volumes: + # Bind mount source code for development + - .:/app + # Preserve virtual environment in container + - app_venv:/app/.venv + environment: + - ENV=dev + - DB_CONNECTION_STRING=mysql://wrongmove:wrongmove@mysql:3306/wrongmove + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + - ROUTING_API_KEY=${ROUTING_API_KEY:-} + depends_on: + redis: + condition: service_healthy + mysql: + condition: service_healthy + command: ["uvicorn", "api.app:app", "--host", "0.0.0.0", "--port", "5001", "--reload", "--reload-dir", "api", "--reload-dir", "services", "--reload-dir", "repositories", "--reload-dir", "models"] + + celery: + build: + context: . + dockerfile: Dockerfile + container_name: rec-celery + volumes: + - .:/app + - app_venv:/app/.venv + environment: + - ENV=dev + - DB_CONNECTION_STRING=mysql://wrongmove:wrongmove@mysql:3306/wrongmove + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + - ROUTING_API_KEY=${ROUTING_API_KEY:-} + - SCRAPE_SCHEDULES=${SCRAPE_SCHEDULES:-} + depends_on: + redis: + condition: service_healthy + mysql: + condition: service_healthy + command: ["celery", "-A", "celery_app", "worker", "--loglevel=info"] + + celery-beat: + build: + context: . + dockerfile: Dockerfile + container_name: rec-celery-beat + volumes: + - .:/app + - app_venv:/app/.venv + environment: + - ENV=dev + - DB_CONNECTION_STRING=mysql://wrongmove:wrongmove@mysql:3306/wrongmove + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + - SCRAPE_SCHEDULES=${SCRAPE_SCHEDULES:-} + depends_on: + - redis + - celery + command: ["celery", "-A", "celery_app", "beat", "--loglevel=info"] + +volumes: + redis_data: + mysql_data: + app_venv: diff --git a/crawler/frontend/src/App.css b/crawler/frontend/src/App.css index 5efa2df..77e7d11 100644 --- a/crawler/frontend/src/App.css +++ b/crawler/frontend/src/App.css @@ -1,50 +1,14 @@ #root { - margin: 0 auto; - padding: 2rem; - text-align: center; -} - -.logo { - height: 6em; - padding: 1.5em; - will-change: filter; - transition: filter 300ms; -} - -.logo:hover { - filter: drop-shadow(0 0 2em #646cffaa); -} - -.logo.react:hover { - filter: drop-shadow(0 0 2em #61dafbaa); -} - -@keyframes logo-spin { - from { - transform: rotate(0deg); - } - - to { - transform: rotate(360deg); - } -} - -@media (prefers-reduced-motion: no-preference) { - a:nth-of-type(2) .logo { - animation: logo-spin infinite 20s linear; - } -} - -.card { - padding: 2em; -} - -.read-the-docs { - color: #888; + margin: 0; + padding: 0; + height: 100%; + overflow: hidden; } html, body { overflow: hidden; height: 100%; + margin: 0; + padding: 0; } diff --git a/crawler/frontend/src/App.tsx b/crawler/frontend/src/App.tsx index 621947b..08be85c 100644 --- a/crawler/frontend/src/App.tsx +++ b/crawler/frontend/src/App.tsx @@ -1,94 +1,38 @@ import type { User } from 'oidc-client-ts'; -import { useEffect, useState } from 'react'; +import { useEffect, useState, useRef, useCallback } from 'react'; import './App.css'; -import { AppSidebar } from './AppSidebar'; -import { getUser, handleCallback, logout } from './auth/authService'; -import ActiveQuery from './components/ActiveQuery'; +import { getUser, handleCallback } from './auth/authService'; import AlertError from './components/AlertError'; import LoginModal from './components/LoginModal'; import { Map } from './components/Map'; -import { Parameters, type ParameterValues } from './components/Parameters'; -import { Spinner } from './components/Spinner'; -import { Breadcrumb, BreadcrumbItem, BreadcrumbLink, BreadcrumbList, BreadcrumbPage, BreadcrumbSeparator } from './components/ui/breadcrumb'; +import { FilterPanel, type ParameterValues, DEFAULT_FILTER_VALUES } from './components/FilterPanel'; +import { Header } from './components/Header'; +import { StatsBar, type ViewMode } from './components/StatsBar'; +import { ListView } from './components/ListView'; +import { StreamingProgressBar } from './components/StreamingProgressBar'; +import { Sheet, SheetContent, SheetTrigger } from './components/ui/sheet'; import { Button } from './components/ui/button'; -import { Separator } from './components/ui/separator'; -import { SidebarInset, SidebarProvider, SidebarTrigger } from './components/ui/sidebar'; - -const fetchData = async (user: User, baseQueyrUri: string, parameters: ParameterValues, method: string = 'GET') => { - const accessToken = user.access_token; - const queryString = new URLSearchParams(); - queryString.append('listing_type', parameters.listing_type) - if (parameters.min_bedrooms) { - queryString.append('min_bedrooms', parameters.min_bedrooms.toString()); - } - if (parameters.max_bedrooms) { - queryString.append('max_bedrooms', parameters.max_bedrooms.toString()) - } - if (parameters.max_price) { - queryString.append("max_price", parameters.max_price.toString()); - } - if (parameters.min_price) { - queryString.append("min_price", parameters.min_price.toString()); - } - if (parameters.min_sqm) { - queryString.append("min_sqm", parameters.min_sqm.toString()); - } - if (parameters.last_seen_days) { - queryString.append("last_seen_days", parameters.last_seen_days.toString()); - } - if (parameters.available_from) { - queryString.append("let_date_available_from", parameters.available_from.toISOString()); - } - if (parameters.district) { - queryString.append("district_names", parameters.district); - } - - const response = await fetch(baseQueyrUri + '?' + queryString, - { - method: method, - headers: { - 'Authorization': `Bearer ${accessToken}`, // Pass the token - 'Content-Type': 'application/json', - }, - } - ); - if (!response.ok) { - throw new Error('Error: ' + response.status); - } - const data: Response = await response.json(); - return data; -}; - - -const fetchActiveTasksForUser = async (user: User) => { - const accessToken = user?.access_token; - const response = await fetch(`/api/tasks_for_user`, { - method: 'GET', - headers: { - 'Authorization': `Bearer ${accessToken}`, // Pass the token - 'Content-Type': 'application/json', - }, - }); - if (!response.ok) { - throw new Error(`Failed to fetch active tasks for user: ${response.status}`); - } - - const data = - await response.json(); - return data; -}; - - +import { Filter } from 'lucide-react'; +import type { GeoJSONFeatureCollection, PropertyProperties, PropertyFeature } from '@/types'; +import { refreshListings, fetchTasksForUser, streamListingGeoJSON, type StreamingProgress } from '@/services'; function App() { - const [listingData, setListingData] = useState({}); + const [listingData, setListingData] = useState(null); const [taskID, setTaskID] = useState(null); const [user, setUser] = useState(null); - const [isParametersModalOpen, setIsParametersModalOpen] = useState(true); const [queryParameters, setQueryParameters] = useState(null); const [submitError, setSubmitError] = useState(null); const [alertDialogIsOpen, setAlertDialogIsOpen] = useState(false); - const [spinnerText, setSpinnerText] = useState(null); + const [isLoading, setIsLoading] = useState(false); + const [viewMode, setViewMode] = useState('map'); + const [mobileFilterOpen, setMobileFilterOpen] = useState(false); + const [highlightedProperty, setHighlightedProperty] = useState(null); + const [streamingProgress, setStreamingProgress] = useState(null); + + // Ref to track accumulated features during streaming + const accumulatedFeaturesRef = useRef([]); + // Ref to track if initial load has been triggered + const initialLoadTriggeredRef = useRef(false); useEffect(() => { // Check if this is a callback from Authentik (after login) @@ -107,101 +51,233 @@ function App() { if (!user) { return; } - fetchActiveTasksForUser(user).then((tasks) => { - if (tasks) { - setTaskID(tasks[0]) + fetchTasksForUser(user).then((tasks) => { + if (tasks && tasks.length > 0) { + setTaskID(tasks[0]); } - }) - }, [user, taskID]) + }); + }, [user, taskID]); + + // Load listings function - used by both auto-load and manual submit + const loadListings = useCallback(async (parameters: ParameterValues) => { + if (!user) return; + + setQueryParameters(parameters); + setMobileFilterOpen(false); + setIsLoading(true); + accumulatedFeaturesRef.current = []; + setStreamingProgress({ count: 0 }); + setListingData(null); + + try { + for await (const batch of streamListingGeoJSON(user, parameters, (progress) => { + setStreamingProgress(progress); + })) { + accumulatedFeaturesRef.current.push(...batch); + setListingData({ + type: 'FeatureCollection', + features: [...accumulatedFeaturesRef.current] + }); + } + } catch (error) { + if (error instanceof Error) { + setSubmitError(error.message); + } else { + setSubmitError(String(error)); + } + setAlertDialogIsOpen(true); + } finally { + setIsLoading(false); + setStreamingProgress(null); + } + }, [user]); + + // Auto-load data with default filters when user is authenticated + useEffect(() => { + if (!user || initialLoadTriggeredRef.current) { + return; + } + initialLoadTriggeredRef.current = true; + + const defaultParams: ParameterValues = { + ...DEFAULT_FILTER_VALUES, + available_from: new Date(), + }; + + loadListings(defaultParams); + }, [user, loadListings]); if (!user) { - return + return ; } const onSubmit = async (action: 'fetch-data' | 'visualize', parameters: ParameterValues) => { - // Fetch listing data - setQueryParameters(parameters) - setIsParametersModalOpen(false) - let data = null; if (action === 'visualize') { - setSpinnerText("Loading data for visualization...") - try { - data = await fetchData(user, "/api/listing_geojson", parameters); - } catch (error) { - // @ts-expect-error - setSubmitError(error.message) - setAlertDialogIsOpen(true) - } finally { - setSpinnerText(null) - } - if (data) { - setListingData(data); - } + loadListings(parameters); } else if (action === 'fetch-data') { - setSpinnerText("Submitting query to refresh listings...") + setQueryParameters(parameters); + setMobileFilterOpen(false); + setIsLoading(true); try { - data = await fetchData(user, "/api/refresh_listings", parameters, 'POST'); - // @ts-expect-error - setTaskID(data.task_id) + const data = await refreshListings(user!, parameters); + setTaskID(data.task_id); } catch (error) { - // @ts-expect-error - setSubmitError(error.message) - setAlertDialogIsOpen(true) + if (error instanceof Error) { + setSubmitError(error.message); + } else { + setSubmitError(String(error)); + } + setAlertDialogIsOpen(true); } finally { - setSpinnerText(null) + setIsLoading(false); } } - console.log(data) - } + }; + + const handlePropertyClick = (property: PropertyProperties, _coordinates: [number, number]) => { + setHighlightedProperty(property.url); + // Optionally: pan map to coordinates + }; + + const renderMainContent = () => { + if (!listingData) { + return ( +
+
+ {isLoading ? ( + <> +
🏠
+

Loading Properties...

+

+ Fetching listings with default filters. You can adjust filters on the left. +

+ + ) : ( + <> +
🏠
+

Welcome to Property Explorer

+

+ Use the filters on the left to find properties. Apply filters to visualize existing data or refresh to fetch new listings. +

+ + )} +
+
+ ); + } + + if (listingData.features.length === 0) { + return ( +
+
+
🔍
+

No listings found

+

+ Try adjusting the filters or run a data refresh to fetch new listings. +

+
+
+ ); + } + + return ( + <> + {/* Map View */} + {(viewMode === 'map' || viewMode === 'split') && ( +
+ +
+ )} + + {/* List View */} + {(viewMode === 'list' || viewMode === 'split') && ( +
+ +
+ )} + + ); + }; + + const handleTaskCancelled = () => { + setTaskID(null); + }; return ( - <> - - - -
- - - - - - - Building Your Application - - - - - Data Fetching - - - -
-
-
-

Welcome, {user.profile.email}!

- - - - +
+ {/* Header */} +
- - {spinnerText} - + {/* Main content area */} +
+ {/* Filter Panel - Desktop (fixed sidebar) */} +
+ +
-
- {Object.keys(listingData).length > 0 && -
- -
- } + {/* Filter Panel - Mobile (sheet) */} +
+ + + + + + + + +
+ + {/* Main View Area */} +
+ {/* Streaming Progress Bar */} +
+
- - - - ) + + {/* Map/List Container */} +
+ {renderMainContent()} +
+ + {/* Stats Bar */} + {listingData && listingData.features.length > 0 && ( +
+ +
+ )} +
+
+ + {/* Error Dialog */} + +
+ ); } -export default App +export default App; diff --git a/crawler/frontend/src/assets/Map.css b/crawler/frontend/src/assets/Map.css index e65969f..db47184 100644 --- a/crawler/frontend/src/assets/Map.css +++ b/crawler/frontend/src/assets/Map.css @@ -1,55 +1,80 @@ #map-container { - /* position: absolute; */ + position: absolute; top: 0; bottom: 0; right: 0; left: 0; width: 100%; height: 100%; - flex: 1; - - /* position: 'relative'; */ - /* overflow: 'visible'; */ - /* isolation: 'isolate'; */ - } -.sidebar { - background-color: rgb(35 55 75 / 90%); - color: #fff; - padding: 6px 12px; - font-family: monospace; - z-index: 1; - position: absolute; - top: 0; - left: 0; - margin: 12px; - border-radius: 4px; -} - - #legend { - box-shadow: 0 1px 2px rgba(0, 0, 0, 0.1); + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15); line-height: 18px; - height: 310px; - width: 60px; - padding: 10px; + height: auto; + min-height: 300px; + width: 90px; + padding: 12px; position: absolute; - top: 0; - right: 0; - background: rgba(255, 255, 255, 0.8); - margin-top: 40px; - margin-right: 20px; - font-family: Arial, sans-serif; + top: 10px; + right: 10px; + background: rgba(255, 255, 255, 0.95); + border-radius: 8px; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; +} + +#legend .axis path, +#legend .axis line { + stroke: #e5e7eb; +} + +#legend .axis text { + fill: #6b7280; } .propertyListingPopupItem { - display: 'flex'; + display: flex; box-sizing: border-box; - border: 1px solid #aaa; justify-content: center; - font-family: sans-serif; - padding: 8px; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; + padding: 6px; width: 50%; - /* 2 columns */ +} + +/* Mapbox popup styling improvements */ +.mapboxgl-popup-content { + padding: 0 !important; + border-radius: 8px !important; + overflow: hidden; + box-shadow: 0 4px 20px rgba(0, 0, 0, 0.15) !important; +} + +.mapboxgl-popup-close-button { + font-size: 20px; + padding: 4px 8px; + color: #6b7280; + z-index: 10; +} + +.mapboxgl-popup-close-button:hover { + background-color: #f3f4f6; + color: #111827; +} + +/* Improve marker visibility */ +.mapboxgl-marker { + cursor: pointer; +} + +/* Mobile adjustments */ +@media (max-width: 768px) { + #legend { + width: 75px; + padding: 8px; + min-height: 250px; + } + + .mapboxgl-popup-content { + max-width: 90vw !important; + } } diff --git a/crawler/frontend/src/components/ActiveQuery.tsx b/crawler/frontend/src/components/ActiveQuery.tsx index b23f58e..9d5af53 100644 --- a/crawler/frontend/src/components/ActiveQuery.tsx +++ b/crawler/frontend/src/components/ActiveQuery.tsx @@ -1,141 +1,128 @@ import { getUser } from '@/auth/authService'; +import { POLLING_INTERVALS } from '@/constants'; +import { fetchTaskStatus, cancelTask } from '@/services'; +import { TaskStatus, type TaskResult } from '@/types'; import type { User } from 'oidc-client-ts'; import React, { useEffect, useState } from 'react'; import AlertError from './AlertError'; import { Spinner } from './Spinner'; import { HoverCard, HoverCardContent, HoverCardTrigger } from './ui/hover-card'; import { Progress } from './ui/progress'; +import { Button } from './ui/button'; +import { X } from 'lucide-react'; -interface ModalProps { +interface ActiveQueryProps { taskID: string | null; + onTaskCancelled?: () => void; } -const fetchTaskStatusData = async (user: User, taskID: string) => { - const accessToken = user?.access_token; - const response = await fetch(`/api/task_status?task_id=${taskID}`, { - method: 'GET', - headers: { - 'Authorization': `Bearer ${accessToken}`, // Pass the token - 'Content-Type': 'application/json', - }, - }); - if (!response.ok) { - throw new Error(`Failed to fetch task status: ${response.status}`); - } - - const data = - await response.json(); - return data; -}; - -type TaskStatus = string -// enum TaskStatus { -// QUEUED = 'queued', -// PROCESSING = 'processing', -// COMPLETED = 'completed', -// FAILED = 'failed', -// } - -// const taskStatusToProgress = (taskStatus: TaskStatus): number => { -// switch (taskStatus) { -// case TaskStatus.QUEUED: -// return 0.33; // Queued status -// case TaskStatus.PROCESSING: -// return 0.66; // Processing status -// case TaskStatus.COMPLETED: -// return 1.0; // Completed status -// default: -// throw new Error('Unknown task status: ' + status); -// } -// } - -// const getTaskStatus = (status: string): TaskStatus => { -// switch (status.toLowerCase()) { -// case 'queued': -// return TaskStatus.QUEUED; -// case 'processing': -// return TaskStatus.PROCESSING; -// case 'completed': -// return TaskStatus.COMPLETED; -// case 'failed': -// return TaskStatus.FAILED; -// default: -// throw new Error('Unknown task status: ' + status); -// } -// }; - -const ActiveQuery: React.FC = ({ - taskID -}) => { +const ActiveQuery: React.FC = ({ taskID, onTaskCancelled }) => { const [user, setUser] = useState(null); useEffect(() => { getUser().then(setUser); }, []); const [progressPercentage, setProgressPercentage] = useState(0); - const [taskStatus, setTaskStatus] = useState("PENDING"); + const [taskStatus, setTaskStatus] = useState(TaskStatus.PENDING); const [lastUpdateTime, setLastUpdateTime] = useState(new Date()); const [fetchStatusError, setFetchStatusError] = useState(null); const [alertDialogIsOpen, setAlertDialogIsOpen] = useState(false); + const [isCancelling, setIsCancelling] = useState(false); - const fetchTaskStatus = async (interval: NodeJS.Timeout) => { - if (!user || !taskID) { - return; - } - let data = null + const handleCancelTask = async () => { + if (!user || !taskID || isCancelling) return; + + setIsCancelling(true); try { - data = await fetchTaskStatusData(user, taskID); - } catch (error: any) { - clearInterval(interval); - setTaskStatus("FAILURE") - setAlertDialogIsOpen(true) - if (error instanceof Error) { - setFetchStatusError(error.message) + const result = await cancelTask(user, taskID); + if (result.success) { + setTaskStatus(TaskStatus.REVOKED); + onTaskCancelled?.(); } else { - setFetchStatusError('Failed to update task status: ' + error.toString()) + setFetchStatusError(result.message); + setAlertDialogIsOpen(true); + } + } catch (error) { + setFetchStatusError(error instanceof Error ? error.message : 'Failed to cancel task'); + setAlertDialogIsOpen(true); + } finally { + setIsCancelling(false); + } + }; + + const pollTaskStatus = async (interval: NodeJS.Timeout) => { + if (!user || !taskID) { + return; + } + + try { + const data = await fetchTaskStatus(user, taskID); + setLastUpdateTime(new Date()); + const status = data.status as TaskStatus; + setTaskStatus(status); + + if (status === TaskStatus.FAILURE || status === TaskStatus.REVOKED) { + clearInterval(interval); + setFetchStatusError('Task failed with status: ' + status); + setAlertDialogIsOpen(true); + return; + } + + if (status === TaskStatus.SUCCESS) { + clearInterval(interval); + setProgressPercentage(100); + return; + } + + // Only parse result for in-progress tasks + if (data.result) { + try { + const parsedResult: TaskResult = JSON.parse(data.result); + setProgressPercentage(parsedResult.progress * 100); + } catch { + // Result parsing failed, but task is still running - ignore + } + } + } catch (error) { + clearInterval(interval); + setTaskStatus(TaskStatus.FAILURE); + setAlertDialogIsOpen(true); + if (error instanceof Error) { + setFetchStatusError(error.message); + } else { + setFetchStatusError('Failed to update task status: ' + String(error)); } - } - if (!data) { - clearInterval(interval); - return; - } - setLastUpdateTime(new Date()); - // const taskStatus = getTaskStatus(data.status); - const taskStatus = data.status; - setTaskStatus(taskStatus); - if (taskStatus === "FAILURE" || taskStatus === "REVOKED") { - clearInterval(interval); - throw new Error('Task failed. status: ' + taskStatus); - } - // const progress = taskStatusToProgress(taskStatus); - const parsedResult = JSON.parse(data.result) - setProgressPercentage(parsedResult.progress * 100); - if (taskStatus === "SUCCESS") { - clearInterval(interval); - return; } }; - // fetch status periodically - // maybe move to ws one day useEffect(() => { - const interval = setInterval - (() => fetchTaskStatus(interval), 5000); // every 5 seconds + const interval = setInterval( + () => pollTaskStatus(interval), + POLLING_INTERVALS.TASK_STATUS_MS + ); return () => clearInterval(interval); - }, [taskID]); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [taskID, user]); if (!taskID) { return null; } + const isInProgress = taskStatus && + taskStatus !== TaskStatus.SUCCESS && + taskStatus !== TaskStatus.FAILURE && + taskStatus !== TaskStatus.REVOKED; + return ( <> -
+
- - {taskStatus && <>Task status: {taskStatus} } - - {taskStatus && taskStatus !== 'SUCCESS' && taskStatus !== 'FAILURE' && taskStatus !== 'REVOKED' && } + +
+ {taskStatus && Task: {taskStatus}} + {isInProgress && } +
+
Task ID: {taskID} @@ -143,10 +130,22 @@ const ActiveQuery: React.FC = ({ Last updated: {lastUpdateTime.toLocaleString()}
+ {isInProgress && ( + + )}
- ) + ); }; export default ActiveQuery; diff --git a/crawler/frontend/src/components/FilterPanel.tsx b/crawler/frontend/src/components/FilterPanel.tsx new file mode 100644 index 0000000..16b39e6 --- /dev/null +++ b/crawler/frontend/src/components/FilterPanel.tsx @@ -0,0 +1,546 @@ +import { zodResolver } from "@hookform/resolvers/zod"; +import { useState, useEffect } from "react"; +import { useForm } from "react-hook-form"; +import { z } from "zod"; +import { Button } from "./ui/button"; +import { Calendar29 } from "./ui/DatePicker"; +import { Form, FormControl, FormDescription, FormField, FormItem, FormLabel, FormMessage } from "./ui/form"; +import { Input } from "./ui/input"; +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select"; +import { Accordion, AccordionContent, AccordionItem, AccordionTrigger } from "./ui/accordion"; +import { Loader2, Filter, RefreshCw } from "lucide-react"; +import { ScrollArea } from "./ui/scroll-area"; + +export enum Metric { + qmprice = 'qmprice', + rooms = 'rooms', + qm = 'qm', + price = 'total_price', +} + +export enum ListingType { + RENT = 'RENT', + BUY = 'BUY' +} + +export enum FurnishType { + FURNISHED = 'furnished', + PART_FURNISHED = 'partFurnished', + UNFURNISHED = 'unfurnished', +} + +// Default filter values - exported so App.tsx can use them for initial load +export const DEFAULT_FILTER_VALUES = { + metric: Metric.qmprice, + listing_type: ListingType.RENT, + min_bedrooms: 1, + max_bedrooms: 3, + max_price: 3000, + min_price: 2000, + min_sqm: 50, + max_sqm: undefined, + min_price_per_sqm: undefined, + max_price_per_sqm: undefined, + last_seen_days: 28, + available_from: new Date(), + district: '', + furnish_types: [] as FurnishType[], +} as const; + +export interface ParameterValues { + metric: Metric + listing_type: ListingType + min_bedrooms?: number + max_bedrooms?: number + min_price?: number + max_price?: number + min_sqm?: number + max_sqm?: number + min_price_per_sqm?: number + max_price_per_sqm?: number + last_seen_days?: number + available_from?: Date + district: string + furnish_types?: FurnishType[] +} + +interface FilterPanelProps { + onSubmit: (action: 'fetch-data' | 'visualize', fromValues: ParameterValues) => void; + isLoading?: boolean; + listingCount?: number; +} + +const formSchema = z.object({ + metric: z.nativeEnum(Metric, { required_error: "Metric is required" }), + listing_type: z.nativeEnum(ListingType, { required_error: "Listing Type is required" }), + min_bedrooms: z.number().min(0).max(10).optional(), + max_bedrooms: z.number().min(0).max(10).optional(), + max_price: z.number().optional(), + min_price: z.number().min(0).optional(), + min_sqm: z.number().optional(), + max_sqm: z.number().optional(), + min_price_per_sqm: z.number().optional(), + max_price_per_sqm: z.number().optional(), + last_seen_days: z.number().min(0).optional(), + available_from: z.date(), + district: z.string(), + furnish_types: z.array(z.nativeEnum(FurnishType)).optional(), +}); + +type FormValues = z.infer; + +export function FilterPanel({ onSubmit, isLoading, listingCount }: FilterPanelProps) { + const [availableFromRawInput, setAvailableFromRawInput] = useState("now"); + const [selectedFurnishTypes, setSelectedFurnishTypes] = useState([]); + + const form = useForm({ + resolver: zodResolver(formSchema), + defaultValues: { + ...DEFAULT_FILTER_VALUES, + available_from: new Date(), // Fresh date on each render + }, + }); + + const handleFormSubmit = (action: 'fetch-data' | 'visualize') => { + return form.handleSubmit((values) => { + const params: ParameterValues = { + ...values, + furnish_types: selectedFurnishTypes.length > 0 ? selectedFurnishTypes : undefined, + }; + onSubmit(action, params); + })(); + }; + + const toggleFurnishType = (type: FurnishType) => { + setSelectedFurnishTypes(prev => + prev.includes(type) + ? prev.filter(t => t !== type) + : [...prev, type] + ); + }; + + // Count active filters + const countActiveFilters = (): number => { + const values = form.getValues(); + let count = 0; + if (values.min_bedrooms && values.min_bedrooms > 0) count++; + if (values.max_bedrooms && values.max_bedrooms < 10) count++; + if (values.min_price && values.min_price > 0) count++; + if (values.max_price) count++; + if (values.min_sqm && values.min_sqm > 0) count++; + if (values.max_sqm) count++; + if (values.min_price_per_sqm) count++; + if (values.max_price_per_sqm) count++; + if (values.district && values.district.length > 0) count++; + if (selectedFurnishTypes.length > 0) count++; + if (values.last_seen_days && values.last_seen_days < 365) count++; + return count; + }; + + const [activeFilterCount, setActiveFilterCount] = useState(0); + + useEffect(() => { + const subscription = form.watch(() => { + setActiveFilterCount(countActiveFilters()); + }); + return () => subscription.unsubscribe(); + }, [form, selectedFurnishTypes]); + + return ( +
+ {/* Header */} +
+
+ +

Filters

+ {activeFilterCount > 0 && ( + + {activeFilterCount} + + )} +
+ {listingCount !== undefined && ( +

+ {listingCount.toLocaleString()} listings +

+ )} +
+ + {/* Filters */} + +
+ + + {/* Visualization Options */} + + + Visualization + + +
+ ( + + Color by + + + + )} + /> + ( + + Type + + + + )} + /> +
+
+
+ + {/* Price & Size */} + + + Price & Size + + +
+
+ ( + + Min Price (£) + + field.onChange(e.target.value ? Number(e.target.value) : undefined)} + /> + + + )} + /> + ( + + Max Price (£) + + field.onChange(e.target.value ? Number(e.target.value) : undefined)} + /> + + + )} + /> +
+
+ ( + + Min Size (m²) + + field.onChange(e.target.value ? Number(e.target.value) : undefined)} + /> + + + )} + /> + ( + + Max Size (m²) + + field.onChange(e.target.value ? Number(e.target.value) : undefined)} + /> + + + )} + /> +
+
+ ( + + Min £/m² + + field.onChange(e.target.value ? Number(e.target.value) : undefined)} + /> + + + )} + /> + ( + + Max £/m² + + field.onChange(e.target.value ? Number(e.target.value) : undefined)} + /> + + + )} + /> +
+
+
+
+ + {/* Features */} + + + Features + + +
+
+ ( + + Min Beds + + field.onChange(e.target.value ? Number(e.target.value) : undefined)} + /> + + + )} + /> + ( + + Max Beds + + field.onChange(e.target.value ? Number(e.target.value) : undefined)} + /> + + + )} + /> +
+
+ Furnishing +
+ {[ + { value: FurnishType.FURNISHED, label: 'Furnished' }, + { value: FurnishType.PART_FURNISHED, label: 'Part' }, + { value: FurnishType.UNFURNISHED, label: 'Unfurn.' }, + ].map((option) => ( + + ))} +
+
+
+
+
+ + {/* Location */} + + + Location + + + ( + + District + + + + + Comma-separated list of districts + + + )} + /> + + + + {/* Availability */} + + + Availability + + +
+ ( + + Available From + + + + + Rental listings only + + + )} + /> + ( + + Last Seen (days) + + field.onChange(e.target.value ? Number(e.target.value) : undefined)} + /> + + + Show listings seen in last N days + + + )} + /> +
+
+
+
+
+ +
+ + {/* Action Buttons */} +
+ + +
+
+ ); +} diff --git a/crawler/frontend/src/components/Header.tsx b/crawler/frontend/src/components/Header.tsx new file mode 100644 index 0000000..b81b6d6 --- /dev/null +++ b/crawler/frontend/src/components/Header.tsx @@ -0,0 +1,80 @@ +import type { User } from 'oidc-client-ts'; +import { Button } from './ui/button'; +import { Separator } from './ui/separator'; +import { LogOut, Home, Filter } from 'lucide-react'; +import { logout } from '@/auth/authService'; +import { HealthIndicator } from './HealthIndicator'; +import { TaskIndicator } from './TaskIndicator'; + +interface HeaderProps { + user: User; + activeFilterCount?: number; + taskID?: string | null; + isLoading?: boolean; + onToggleFilters?: () => void; + showFilterToggle?: boolean; + onTaskCancelled?: () => void; +} + +export function Header({ + user, + activeFilterCount = 0, + taskID, + onToggleFilters, + showFilterToggle = false, + onTaskCancelled, +}: HeaderProps) { + return ( +
+ {/* Logo / Brand */} +
+ + Wrongmove +
+ + + + {/* Health Indicator */} + + + {/* Task Indicator */} + + + {/* Filter Toggle (mobile) */} + {showFilterToggle && ( + + )} + + {/* Spacer */} +
+ + {/* User Menu */} +
+ + {user.profile.email} + + +
+
+ ); +} diff --git a/crawler/frontend/src/components/HealthIndicator.tsx b/crawler/frontend/src/components/HealthIndicator.tsx new file mode 100644 index 0000000..d511b02 --- /dev/null +++ b/crawler/frontend/src/components/HealthIndicator.tsx @@ -0,0 +1,83 @@ +import { useEffect, useState } from 'react'; +import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from './ui/tooltip'; +import { checkBackendHealth, type HealthStatus, type HealthCheckResult } from '@/services'; +import { Circle, Loader2 } from 'lucide-react'; + +interface HealthIndicatorProps { + /** How often to check health in milliseconds (default: 30000 = 30s) */ + interval?: number; +} + +export function HealthIndicator({ interval = 30000 }: HealthIndicatorProps) { + const [health, setHealth] = useState({ status: 'checking' }); + + useEffect(() => { + // Initial check + checkBackendHealth().then(setHealth); + + // Periodic checks + const intervalId = setInterval(() => { + checkBackendHealth().then(setHealth); + }, interval); + + return () => clearInterval(intervalId); + }, [interval]); + + const getStatusColor = (status: HealthStatus) => { + switch (status) { + case 'healthy': + return 'text-green-500'; + case 'unhealthy': + return 'text-red-500'; + case 'checking': + return 'text-muted-foreground'; + } + }; + + const getStatusLabel = (status: HealthStatus) => { + switch (status) { + case 'healthy': + return 'Connected'; + case 'unhealthy': + return 'Disconnected'; + case 'checking': + return 'Checking...'; + } + }; + + const getTooltipContent = () => { + if (health.status === 'checking') { + return 'Checking backend connection...'; + } + + if (health.status === 'healthy') { + return `Backend connected (${health.latencyMs}ms)`; + } + + return `Backend unavailable: ${health.error || 'Unknown error'}`; + }; + + return ( + + + +
+ {health.status === 'checking' ? ( + + ) : ( + + )} + +
+
+ +

{getTooltipContent()}

+
+
+
+ ); +} diff --git a/crawler/frontend/src/components/Map.tsx b/crawler/frontend/src/components/Map.tsx index 0b1337c..0bd0669 100644 --- a/crawler/frontend/src/components/Map.tsx +++ b/crawler/frontend/src/components/Map.tsx @@ -1,169 +1,241 @@ -// @ts-nocheck import crossfilter from "crossfilter2"; import * as d3 from "d3"; import mapboxgl from "mapbox-gl"; -import 'mapbox-gl/dist/mapbox-gl.css'; // this hides the map for some reason -import { useEffect, useRef } from "react"; +import 'mapbox-gl/dist/mapbox-gl.css'; +import { useEffect, useRef, useMemo, useCallback } from "react"; import { renderToString } from 'react-dom/server'; import "../assets/Map.css"; import { Metric, type ParameterValues } from "./Parameters"; -import { Button } from "./ui/button"; +import { PropertyCard } from "./PropertyCard"; import { ScrollArea } from "./ui/scroll-area"; -import { Separator } from "./ui/separator"; +import type { GeoJSONFeatureCollection, PropertyFeature, PropertyProperties } from "@/types"; +import { MAP_CONFIG, HEATMAP_CONFIG, PERCENTILE_CONFIG } from "@/constants"; +import { getColorSchemeForMetric, getMetricInterpretation } from "@/constants/colorSchemes"; +import { clone, percentile, calculateColorStops } from "@/utils/mapUtils"; -export function Map( - props: { - listingData: any; - queryParameters: ParameterValues | null; - } -) { +// Type declaration for the external HexgridHeatmap library +declare class HexgridHeatmap { + _tree: { + search: (bounds: { minX: number; maxX: number; minY: number; maxY: number }) => PropertyWithCoords[]; + }; + constructor(map: mapboxgl.Map, id: string, beforeLayer: string); + setIntensity(value: number): void; + setSpread(value: number): void; + setCellDensity(value: number): void; + setPropertyName(name: string): void; + setData(data: GeoJSONFeatureCollection): void; + setColorStops(stops: [number, string][]): void; + update(): void; +} + +interface PropertyWithCoords { + properties: PropertyProperties; +} + +interface CrossfilterRecord extends PropertyProperties { + index: number; +} + +interface MapProps { + listingData: GeoJSONFeatureCollection; + queryParameters: ParameterValues | null; + onPropertyClick?: (property: PropertyProperties, coordinates: [number, number]) => void; +} + +interface FilterState { + city: string; + country: string | null; + mode: string; + count?: number; +} + +export function Map(props: MapProps) { const data = props.listingData; - var crossData = data.features.map(function (d, i) { - //clone properties - var props = clone(d['properties']); - props['index'] = i; - return props; - }); - const cf = crossfilter(crossData); - const qmDim = cf.dimension(function (d) { return d.qm; }); - const cityDim = cf.dimension(function (d) { return d.city; }); - const countryDim = cf.dimension(function (d) { return d.country; }); - const rentDim = cf.dimension(function (d) { return d.total_price; }); - const roomsDim = cf.dimension(function (d) { return d.rooms; }); - const urlDim = cf.dimension(function (d) { return d.url; }); - const indexDim = cf.dimension(function (d) { return d.index; }); - let heatmap = null; - // rivet - var filter = { city: 'London', country: null, mode: Metric.qmprice }; - // filter['countries'] = Array.from(new Set(data.features.map(function (d) { return d['properties']['country'] }))); + const mapRef = useRef(null); + const mapContainerRef = useRef(null); + const heatmapRef = useRef(null); + const updateTimeoutRef = useRef(null); + const isMapLoadedRef = useRef(false); + const lastDataLengthRef = useRef(0); + + const filter: FilterState = { city: 'London', country: null, mode: Metric.qmprice }; if (props.queryParameters) { - filter['mode'] = props.queryParameters.metric; + filter.mode = props.queryParameters.metric; } - // rivets.bind(document.getElementById('overlay'), { filter: filter }); - const mapRef = useRef(mapboxgl.Map) - const mapContainerRef = useRef('map-container') - useEffect(() => { - mapboxgl.accessToken = 'pk.eyJ1IjoiZGktdG8iLCJhIjoiY2o0bnBoYXcxMW1mNzJ3bDhmc2xiNWttaiJ9.ZccatVk_4shzoAsEUXXecA'; - mapRef.current = new mapboxgl.Map({ - container: mapContainerRef.current, - style: 'mapbox://styles/mapbox/light-v9', - center: [13.38032, 49.994210], - zoom: 5 + + // Get appropriate color scheme based on metric + const colorScheme = useMemo(() => { + return getColorSchemeForMetric(filter.mode); + }, [filter.mode]); + + const metricInfo = useMemo(() => { + return getMetricInterpretation(filter.mode); + }, [filter.mode]); + + // Calculate average price per sqm for property cards + const avgPricePerSqm = useMemo(() => { + const validPrices = data.features + .map((f) => f.properties.qmprice) + .filter((p): p is number => typeof p === 'number' && p > 0); + return validPrices.length > 0 + ? validPrices.reduce((a, b) => a + b, 0) / validPrices.length + : 0; + }, [data]); + + // Build crossfilter data + const buildCrossfilterData = useCallback(() => { + return data.features.map(function (d: PropertyFeature, i: number) { + const propsCopy = clone(d.properties) as CrossfilterRecord; + propsCopy.index = i; + return propsCopy; }); - mapRef.current.on('load', function () { - update() - }) - mapRef.current.on('click', function (e) { - openListingsDialog(e.lngLat.lng, e.lngLat.lat); - }) - return () => { - mapRef.current.remove() + }, [data]); + + const updateHeatmap = useCallback(() => { + if (!mapRef.current || !isMapLoadedRef.current) return; + + const crossData = buildCrossfilterData(); + const cf = crossfilter(crossData); + const qmDim = cf.dimension(function (d: CrossfilterRecord) { return d.qm; }); + const cityDim = cf.dimension(function (d: CrossfilterRecord) { return d.city; }); + const countryDim = cf.dimension(function (d: CrossfilterRecord) { return d.country; }); + const indexDim = cf.dimension(function (d: CrossfilterRecord) { return d.index; }); + + // Create heatmap if it doesn't exist + if (!heatmapRef.current) { + heatmapRef.current = new HexgridHeatmap(mapRef.current, "hexgrid-heatmap", "waterway-label"); + heatmapRef.current.setIntensity(HEATMAP_CONFIG.INTENSITY); + heatmapRef.current.setSpread(HEATMAP_CONFIG.SPREAD); + heatmapRef.current.setCellDensity(HEATMAP_CONFIG.CELL_DENSITY); } - }, [data]) - - function clone(d) { - return JSON.parse(JSON.stringify(d)); - } - - function percentile(arr, p) { - if (arr.length === 0) return 0; - if (typeof p !== 'number') throw new TypeError('p must be a number'); - if (p <= 0) return arr[0]; - if (p >= 1) return arr[arr.length - 1]; - - var index = arr.length * p, - lower = Math.floor(index), - upper = lower + 1, - weight = index % 1; - - if (upper >= arr.length) return arr[lower]; - return arr[lower] * (1 - weight) + arr[upper] * weight; - } - - function update() { - // init heatmap - heatmap = new HexgridHeatmap(mapRef.current, "hexgrid-heatmap", "waterway-label"); - heatmap.setIntensity(9); // dunno yet - heatmap.setSpread(0.05); // dunno yet - heatmap.setCellDensity(0.5); // small value == bigger hexagons + const heatmap = heatmapRef.current; heatmap.setPropertyName(filter.mode); if (filter.mode === Metric.qmprice) { - // if we visualize sqm based data, remove properties where we have no data - qmDim.filter(function (d) { return d > 0; }); + qmDim.filter((d) => (d as number) > 0); } - - // set filter if (filter.city) { cityDim.filterExact(filter.city); } else if (filter.country) { countryDim.filterExact(filter.country); - } else { - alert('nothing loadable'); } - filter.count = cityDim.top(Infinity).length; - var subset = { "type": "FeatureCollection", "features": [] }; - indexDim.top(Infinity).forEach(function (i) { + const subset: GeoJSONFeatureCollection = { type: "FeatureCollection", features: [] }; + indexDim.top(Infinity).forEach(function (i: CrossfilterRecord) { subset.features.push(data.features[i.index]); }); - loadData(heatmap, subset); - } - - function loadData(heatmap, subset) { + // Update heatmap data heatmap.setData(subset); - var values = subset.features.map(function (d) { return d['properties'][filter.mode] }); - values = values.sort(function (a, b) { return a - b; }); - - // setting the color stops, min is at 5th percentile, max at 95percentile - var min = values[Math.round(values.length * 0.05)]; - var max = values[Math.round(values.length * 0.95)]; - var colorStopsPerc = [ - [0, "rgba(0,185,243,0)"], - [25, "rgba(0,185,243,0.24)"], - [60, "rgba(255,223,0,0.3)"], - [100, "rgba(255,105,0,0.3)"], - ]; - makeLegend(colorStopsPerc, min, max); - var colorStopsValue = colorStopsPerc.map(function (d) { - return [min + d[0] * (max - min) / 100, d[1]]; + let values = subset.features.map(function (d: PropertyFeature) { + return d.properties[filter.mode as keyof PropertyProperties] as number; }); + values = values.sort(function (a: number, b: number) { return a - b; }); + + const minIndex = Math.round(values.length * PERCENTILE_CONFIG.MIN_BOUND); + const maxIndex = Math.round(values.length * PERCENTILE_CONFIG.MAX_BOUND); + const min = values[minIndex]; + const max = values[maxIndex]; + + makeLegend(colorScheme, min, max); + const colorStopsValue = calculateColorStops(colorScheme, min, max); heatmap.setColorStops(colorStopsValue); heatmap.update(); - //get bounding box and zoom to that area - // we use a 1% percentile since some data can be corrupt - var longitudes = subset.features.map(function (d) { return d.geometry.coordinates[0]; }).sort(function (a, b) { return a - b; }); - var latitudes = subset.features.map(function (d) { return d.geometry.coordinates[1]; }).sort(function (a, b) { return a - b; }); - var minlng = percentile(longitudes, 0.01); - var maxlng = percentile(longitudes, 0.99); - var minlat = percentile(latitudes, 0.01); - var maxlat = percentile(latitudes, 0.99); - mapRef.current.fitBounds([ - [minlng, minlat], - [maxlng, maxlat] - ]); - } + // Fit bounds only on first load or significant data change + if (lastDataLengthRef.current === 0 && subset.features.length > 0) { + const longitudes = subset.features.map(function (d: PropertyFeature) { return d.geometry.coordinates[0]; }).sort(function (a: number, b: number) { return a - b; }); + const latitudes = subset.features.map(function (d: PropertyFeature) { return d.geometry.coordinates[1]; }).sort(function (a: number, b: number) { return a - b; }); + const minlng = percentile(longitudes, PERCENTILE_CONFIG.BOUNDS_CLIP_MIN); + const maxlng = percentile(longitudes, PERCENTILE_CONFIG.BOUNDS_CLIP_MAX); + const minlat = percentile(latitudes, PERCENTILE_CONFIG.BOUNDS_CLIP_MIN); + const maxlat = percentile(latitudes, PERCENTILE_CONFIG.BOUNDS_CLIP_MAX); - function makeLegend(colorstops, minValue, maxValue) { - /** - * colorstops: [[0, 'green'], [100, 'red']] - * @type {number} - */ - var svg_height = 300, svg_width = 70; - // clear svg before starting + mapRef.current?.fitBounds([ + [minlng, minlat], + [maxlng, maxlat] + ]); + } + + lastDataLengthRef.current = subset.features.length; + }, [data, filter.mode, filter.city, filter.country, colorScheme, buildCrossfilterData]); + + // Initialize map + useEffect(() => { + if (!mapContainerRef.current) return; + + mapboxgl.accessToken = MAP_CONFIG.MAPBOX_TOKEN; + mapRef.current = new mapboxgl.Map({ + container: mapContainerRef.current, + style: MAP_CONFIG.STYLE, + center: MAP_CONFIG.DEFAULT_CENTER, + zoom: MAP_CONFIG.DEFAULT_ZOOM + }); + mapRef.current.on('load', function () { + isMapLoadedRef.current = true; + lastDataLengthRef.current = 0; + updateHeatmap(); + }); + mapRef.current.on('click', function (e: mapboxgl.MapMouseEvent) { + openListingsDialog(e.lngLat.lng, e.lngLat.lat); + }); + return () => { + if (updateTimeoutRef.current) { + clearTimeout(updateTimeoutRef.current); + } + heatmapRef.current = null; + isMapLoadedRef.current = false; + mapRef.current?.remove(); + }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + // Debounced update effect - only update after 200ms of no changes + useEffect(() => { + if (!isMapLoadedRef.current) return; + + // Clear any pending update + if (updateTimeoutRef.current) { + clearTimeout(updateTimeoutRef.current); + } + + // Schedule new update after 200ms of no changes + updateTimeoutRef.current = window.setTimeout(() => { + updateHeatmap(); + }, 200); + + return () => { + if (updateTimeoutRef.current) { + clearTimeout(updateTimeoutRef.current); + } + }; + }, [data, updateHeatmap]); + + function makeLegend(colorstops: [number, string][], minValue: number, maxValue: number) { + const svg_height = 280, svg_width = 80; d3.select('#svg').selectAll('*').remove(); - // create a new SVG element const svg = d3.select('#svg'); - var defs = svg + svg .attr('height', svg_height) .attr('width', svg_width); - var linearGradient = svg.append("defs") + // Add metric name at top + svg.append("text") + .attr("x", svg_width / 2) + .attr("y", 12) + .attr("text-anchor", "middle") + .attr("font-size", "11px") + .attr("font-weight", "600") + .attr("fill", "#374151") + .text(metricInfo.name); + + const gradientTop = 30; + const gradientHeight = svg_height - 70; + + const linearGradient = svg.append("defs") .append("linearGradient") .attr("id", "linear-gradient"); @@ -174,150 +246,104 @@ export function Map( .attr("y2", "0%"); svg.append("rect") - .attr("width", svg_width * 0.4) - .attr("height", svg_height) + .attr("x", 0) + .attr("y", gradientTop) + .attr("width", svg_width * 0.35) + .attr("height", gradientHeight) .attr('rx', 4) .style("fill", "url(#linear-gradient)"); - colorstops.forEach(function (d) { + colorstops.forEach(function (d: [number, string]) { linearGradient.append("stop") .attr("offset", d[0] + "%") .attr("stop-color", d[1]); }); - - var xScale = d3.scaleLinear().range([svg_height - 20, 0]).domain([minValue, maxValue]); - var xAxis = d3.axisRight(xScale).ticks(5); + const xScale = d3.scaleLinear().range([gradientHeight - 10, 0]).domain([minValue, maxValue]); + const xAxis = d3.axisRight(xScale).ticks(5).tickFormat((d) => { + const num = d as number; + if (num >= 1000) { + return `${(num / 1000).toFixed(1)}k`; + } + return String(Math.round(num)); + }); svg.append("g") .attr("class", "axis") - .attr("transform", "translate(" + svg_width / 2 + "," + (10) + ")") - .call(xAxis); + .attr("transform", "translate(" + (svg_width * 0.38) + "," + (gradientTop + 5) + ")") + .call(xAxis) + .selectAll("text") + .attr("font-size", "10px"); + + // Add interpretation labels at bottom + svg.append("text") + .attr("x", svg_width / 2) + .attr("y", svg_height - 25) + .attr("text-anchor", "middle") + .attr("font-size", "9px") + .attr("fill", "#22c55e") + .text(metricInfo.low); + + svg.append("text") + .attr("x", svg_width / 2) + .attr("y", svg_height - 10) + .attr("text-anchor", "middle") + .attr("font-size", "9px") + .attr("fill", "#ef4444") + .text(metricInfo.high); } function openListingsDialog(longitude: number, latitude: number) { - const searchBuffer = 0.001 // ~100m - const properties = heatmap._tree.search({ + if (!heatmapRef.current || !mapRef.current) return; + + const searchBuffer = HEATMAP_CONFIG.SEARCH_BUFFER; + const properties = heatmapRef.current._tree.search({ minX: longitude - searchBuffer, maxX: longitude + searchBuffer, minY: latitude - searchBuffer, maxY: latitude + searchBuffer - }) + }); if (properties.length > 0) { const listingDialogPopup = getListingDialog(properties); new mapboxgl.Popup() .setLngLat([longitude, latitude]) .setHTML(renderToString(listingDialogPopup)) - .setMaxWidth("500px") + .setMaxWidth("450px") .addTo(mapRef.current); } } - function getListingDialog(properties) { - let listingComponents = []; - for (let property of properties) { - listingComponents.push(getPropertyComponent(property)); - } - return -
- -
- Showing {properties.length} properties + function getListingDialog(properties: PropertyWithCoords[]) { + return ( + +
+
+ {properties.length} properties in this area +
+
+ {properties.map((property) => ( + + ))} +
- {listingComponents.map((item) => { - const scrollDiv =
- {item} - -
; - return scrollDiv - })} -
- ; + + ); } - function getPropertyComponent(property) { - const priceHistoryHTMLs = property.properties.price_history.map((d) => { - return
  • ${d.last_seen.split('T')[0]}: £${d.price}
  • ; - }); - - let priceHistoryHTML = <>; - if (priceHistoryHTMLs.length > 1) { - priceHistoryHTML = -
    - Price history: -
      - ${priceHistoryHTMLs.join('')} -
    -
    -
    - - } - const lastSeenStr = property.properties.last_seen.split('T')[0]; - const lastSeenDays = Math.round((new Date() - new Date(lastSeenStr)) / (1000 * 60 * 60 * 24)); - return
    -
    - - - -
    -
    - Available from: -
    -
    - {property.properties.available_from} -
    -
    - Price: -
    -
    - £{property.properties.total_price} -
    - {priceHistoryHTML} -
    - Rooms: -
    -
    - - {property.properties.rooms} -
    -
    - Area: -
    -
    - {property.properties.qm} m² -
    -
    - Price per area: -
    -
    - £{property.properties.qmprice}/m² -
    -
    - Last seen: -
    -
    - {lastSeenDays} days ago -
    -
    - Agency: -
    -
    - {property.properties.agency} -
    -
    - + return ( +
    +
    +
    +
    - } - - return <> -
    - -
    - - -
    - + ); } + +// Re-export types for backwards compatibility +export { Metric, type ParameterValues } from "./Parameters"; diff --git a/crawler/frontend/src/components/PropertyCard.tsx b/crawler/frontend/src/components/PropertyCard.tsx new file mode 100644 index 0000000..29ac5a0 --- /dev/null +++ b/crawler/frontend/src/components/PropertyCard.tsx @@ -0,0 +1,188 @@ +import { ExternalLink, Bed, Maximize2, PoundSterling, Clock, Building } from 'lucide-react'; +import { Button } from './ui/button'; +import type { PropertyProperties } from '@/types'; + +interface PropertyCardProps { + property: PropertyProperties; + variant?: 'compact' | 'full'; + isHighlighted?: boolean; + avgPricePerSqm?: number; + onClick?: () => void; +} + +export function PropertyCard({ + property, + variant = 'compact', + isHighlighted = false, + avgPricePerSqm, + onClick, +}: PropertyCardProps) { + const lastSeenDate = property.last_seen.split('T')[0]; + const lastSeenDays = Math.round((Date.now() - new Date(lastSeenDate).getTime()) / (1000 * 60 * 60 * 24)); + + // Determine if this is a good deal + const isGoodDeal = avgPricePerSqm && property.qmprice > 0 && property.qmprice < avgPricePerSqm * 0.9; + const isExpensive = avgPricePerSqm && property.qmprice > avgPricePerSqm * 1.1; + + const priceIndicator = isGoodDeal + ? { color: 'text-green-600 bg-green-50', label: 'Good deal' } + : isExpensive + ? { color: 'text-red-600 bg-red-50', label: 'Above avg' } + : null; + + const handleClick = () => { + window.open(property.url, '_blank', 'noopener,noreferrer'); + onClick?.(); + }; + + if (variant === 'compact') { + return ( +
    + {/* Thumbnail */} +
    + {property.photo_thumbnail && ( + Property + )} +
    + + {/* Details */} +
    +
    +
    + £{property.total_price.toLocaleString()} + /mo +
    + {priceIndicator && ( + + {priceIndicator.label} + + )} +
    + +
    + + + {property.rooms} + + + + {property.qm} m² + + + £{property.qmprice}/m² + +
    + +
    + + + {lastSeenDays}d ago + + {property.agency} +
    +
    +
    + ); + } + + // Full variant (for popup/detail view) + return ( +
    + {/* Header with image and price */} +
    + + {property.photo_thumbnail && ( + Property + )} + + +
    +
    +
    +
    + £{property.total_price.toLocaleString()} + /mo +
    + {priceIndicator && ( + + {priceIndicator.label} + + )} +
    +
    +
    +
    + + {/* Stats grid */} +
    +
    + + {property.rooms} bedrooms +
    +
    + + {property.qm} +
    +
    + + £{property.qmprice}/m² +
    +
    + + Available {property.available_from} +
    +
    + + {/* Agency and last seen */} +
    + + {property.agency} + + Seen {lastSeenDays} days ago +
    + + {/* Price history */} + {property.price_history.length > 1 && ( +
    +
    Price history
    +
    + {property.price_history.slice(0, 5).map((entry) => ( +
    + {entry.last_seen.split('T')[0]} + £{entry.price.toLocaleString()} +
    + ))} +
    +
    + )} + + {/* Actions */} + +
    + ); +} diff --git a/crawler/frontend/src/components/TaskIndicator.tsx b/crawler/frontend/src/components/TaskIndicator.tsx new file mode 100644 index 0000000..403692c --- /dev/null +++ b/crawler/frontend/src/components/TaskIndicator.tsx @@ -0,0 +1,166 @@ +import { getUser } from '@/auth/authService'; +import { POLLING_INTERVALS } from '@/constants'; +import { fetchTaskStatus, cancelTask } from '@/services'; +import { TaskStatus, type TaskResult } from '@/types'; +import type { User } from 'oidc-client-ts'; +import { useEffect, useState } from 'react'; +import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from './ui/tooltip'; +import { Button } from './ui/button'; +import { Loader2, CheckCircle2, XCircle, X } from 'lucide-react'; + +interface TaskIndicatorProps { + taskID: string | null; + onTaskCancelled?: () => void; +} + +export function TaskIndicator({ taskID, onTaskCancelled }: TaskIndicatorProps) { + const [user, setUser] = useState(null); + const [progressPercentage, setProgressPercentage] = useState(0); + const [taskStatus, setTaskStatus] = useState(null); + const [isCancelling, setIsCancelling] = useState(false); + + useEffect(() => { + getUser().then(setUser); + }, []); + + useEffect(() => { + if (!user || !taskID) { + setTaskStatus(null); + return; + } + + // Reset state for new task + setTaskStatus(TaskStatus.PENDING); + setProgressPercentage(0); + + const pollTaskStatus = async () => { + try { + const data = await fetchTaskStatus(user, taskID); + const status = data.status as TaskStatus; + setTaskStatus(status); + + if (status === TaskStatus.SUCCESS) { + setProgressPercentage(100); + return true; // Stop polling + } + + if (status === TaskStatus.FAILURE || status === TaskStatus.REVOKED) { + return true; // Stop polling + } + + // Parse progress for in-progress tasks + if (data.result) { + try { + const parsedResult: TaskResult = JSON.parse(data.result); + setProgressPercentage(parsedResult.progress * 100); + } catch { + // Ignore parsing errors + } + } + return false; // Continue polling + } catch { + setTaskStatus(TaskStatus.FAILURE); + return true; // Stop polling on error + } + }; + + // Initial poll + pollTaskStatus(); + + const interval = setInterval(async () => { + const shouldStop = await pollTaskStatus(); + if (shouldStop) { + clearInterval(interval); + } + }, POLLING_INTERVALS.TASK_STATUS_MS); + + return () => clearInterval(interval); + }, [taskID, user]); + + const handleCancel = async () => { + if (!user || !taskID || isCancelling) return; + + setIsCancelling(true); + try { + const result = await cancelTask(user, taskID); + if (result.success) { + setTaskStatus(TaskStatus.REVOKED); + onTaskCancelled?.(); + } + } catch { + // Ignore cancel errors + } finally { + setIsCancelling(false); + } + }; + + if (!taskID || !taskStatus) { + return null; + } + + const isInProgress = taskStatus !== TaskStatus.SUCCESS && + taskStatus !== TaskStatus.FAILURE && + taskStatus !== TaskStatus.REVOKED; + + const getStatusIcon = () => { + if (isInProgress) { + return ; + } + if (taskStatus === TaskStatus.SUCCESS) { + return ; + } + return ; + }; + + const getTooltipContent = () => { + if (isInProgress) { + return `Task running: ${Math.round(progressPercentage)}%`; + } + if (taskStatus === TaskStatus.SUCCESS) { + return 'Task completed successfully'; + } + if (taskStatus === TaskStatus.REVOKED) { + return 'Task was cancelled'; + } + return 'Task failed'; + }; + + return ( + +
    + + +
    + {getStatusIcon()} + + {isInProgress ? `${Math.round(progressPercentage)}%` : taskStatus} + +
    +
    + +

    {getTooltipContent()}

    +

    ID: {taskID.slice(0, 8)}...

    +
    +
    + {isInProgress && ( + + + + + +

    Cancel task

    +
    +
    + )} +
    +
    + ); +} diff --git a/crawler/frontend/src/services/healthService.ts b/crawler/frontend/src/services/healthService.ts new file mode 100644 index 0000000..b5fb382 --- /dev/null +++ b/crawler/frontend/src/services/healthService.ts @@ -0,0 +1,74 @@ +// Health check service for backend connectivity + +export type HealthStatus = 'checking' | 'healthy' | 'unhealthy'; + +export interface HealthCheckResult { + status: HealthStatus; + latencyMs?: number; + error?: string; +} + +/** + * Check backend health by calling the /api/status endpoint + */ +export async function checkBackendHealth(): Promise { + const startTime = performance.now(); + + try { + const response = await fetch('/api/status', { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + }, + // Short timeout for health checks + signal: AbortSignal.timeout(5000), + }); + + const latencyMs = Math.round(performance.now() - startTime); + + if (!response.ok) { + return { + status: 'unhealthy', + latencyMs, + error: `HTTP ${response.status}`, + }; + } + + const data = await response.json(); + if (data.status === 'OK') { + return { + status: 'healthy', + latencyMs, + }; + } + + return { + status: 'unhealthy', + latencyMs, + error: 'Unexpected response', + }; + } catch (error) { + const latencyMs = Math.round(performance.now() - startTime); + + if (error instanceof Error) { + if (error.name === 'TimeoutError' || error.name === 'AbortError') { + return { + status: 'unhealthy', + latencyMs, + error: 'Request timeout', + }; + } + return { + status: 'unhealthy', + latencyMs, + error: error.message, + }; + } + + return { + status: 'unhealthy', + latencyMs, + error: 'Connection failed', + }; + } +} diff --git a/crawler/frontend/src/services/taskService.ts b/crawler/frontend/src/services/taskService.ts new file mode 100644 index 0000000..cd94bf3 --- /dev/null +++ b/crawler/frontend/src/services/taskService.ts @@ -0,0 +1,43 @@ +// Task service for fetching task status + +import type { User } from 'oidc-client-ts'; +import type { TaskStatusResponse } from '@/types'; +import { apiRequest } from './apiClient'; +import { API_ENDPOINTS } from '@/constants'; + +export interface CancelTaskResponse { + success: boolean; + message: string; +} + +/** + * Fetch all active tasks for the current user + */ +export async function fetchTasksForUser(user: User): Promise { + return apiRequest(user, API_ENDPOINTS.TASKS_FOR_USER); +} + +/** + * Fetch the status of a specific task + */ +export async function fetchTaskStatus( + user: User, + taskId: string +): Promise { + return apiRequest(user, API_ENDPOINTS.TASK_STATUS, { + params: { task_id: taskId }, + }); +} + +/** + * Cancel a running task + */ +export async function cancelTask( + user: User, + taskId: string +): Promise { + return apiRequest(user, API_ENDPOINTS.CANCEL_TASK, { + method: 'POST', + params: { task_id: taskId }, + }); +} diff --git a/crawler/tasks/listing_tasks.py b/crawler/tasks/listing_tasks.py index 3738d32..33afe45 100644 --- a/crawler/tasks/listing_tasks.py +++ b/crawler/tasks/listing_tasks.py @@ -1,35 +1,50 @@ import asyncio -import importlib import itertools import logging from typing import Any from celery import Task +from celery.schedules import crontab from celery_app import app +from config.schedule_config import SchedulesConfig from listing_processor import ListingProcessor -from models.listing import Listing, ListingType, QueryParameters +from models.listing import Listing, QueryParameters from rec.districts import get_districts from rec.query import listing_query from repositories.listing_repository import ListingRepository from database import engine - -dump_images_module = importlib.import_module("3_dump_images") -detect_floorplan_module = importlib.import_module("4_detect_floorplan") +from services import image_fetcher, floorplan_detector +from utils.redis_lock import redis_lock logger = logging.getLogger("uvicorn.error") +SCRAPE_LOCK_NAME = "scrape_listings" + @app.task(bind=True, pydantic=True) def dump_listings_task(self: Task, parameters_json: str) -> dict[str, Any]: - parsed_parameters = QueryParameters.model_validate_json(parameters_json) - self.update_state(state="Starting...", meta={"progress": 0}) - asyncio.run(dump_listings_full(task=self, parameters=parsed_parameters)) - return {"progress": 0} + with redis_lock(SCRAPE_LOCK_NAME) as acquired: + if not acquired: + logger.warning("Another scrape job is already running, skipping this execution") + self.update_state(state="SKIPPED", meta={"reason": "Another scrape job is running"}) + return {"status": "skipped", "reason": "another_job_running"} + + logger.info(f"Acquired lock: {SCRAPE_LOCK_NAME}") + parsed_parameters = QueryParameters.model_validate_json(parameters_json) + self.update_state(state="Starting...", meta={"progress": 0}) + asyncio.run(dump_listings_full(task=self, parameters=parsed_parameters)) + return {"progress": 0} async def async_dump_listings_task(parameters_json: str) -> dict[str, Any]: - parsed_parameters = QueryParameters.model_validate_json(parameters_json) - await dump_listings_full(task=Task(), parameters=parsed_parameters) - return {"progress": 0} + with redis_lock(SCRAPE_LOCK_NAME) as acquired: + if not acquired: + logger.warning("Another scrape job is already running, skipping this execution") + return {"status": "skipped", "reason": "another_job_running"} + + logger.info(f"Acquired lock: {SCRAPE_LOCK_NAME}") + parsed_parameters = QueryParameters.model_validate_json(parameters_json) + await dump_listings_full(task=Task(), parameters=parsed_parameters) + return {"progress": 0} async def dump_listings_full( @@ -83,19 +98,27 @@ async def dump_listings_and_monitor( @app.on_after_finalize.connect def setup_periodic_tasks(sender, **kwargs): - sender.add_periodic_task( - 3600 * 24, # Daily updates - dump_listings_task.s( - QueryParameters( - listing_type=ListingType.RENT, - min_bedrooms=2, - max_bedrooms=3, - min_price=2000, - max_price=4000, - ).model_dump_json() - ), - name="Daily dump of interesting rent listings", - ) + """Register periodic tasks from environment configuration.""" + try: + config = SchedulesConfig.from_env() + except ValueError as e: + logger.error(f"Failed to load schedule configuration: {e}") + return + + for schedule in config.get_enabled_schedules(): + logger.info( + f"Registering periodic task: {schedule.name} at {schedule.hour}:{schedule.minute}" + ) + + sender.add_periodic_task( + crontab( + minute=schedule.minute, + hour=schedule.hour, + day_of_week=schedule.day_of_week, + ), + dump_listings_task.s(schedule.to_query_parameters().model_dump_json()), + name=schedule.name, + ) async def get_ids_to_process(