Add structured JSON logging, OTel business metrics, and Grafana dashboard

Structured logging via JsonFormatter replaces uvicorn's default format so
Loki can parse timestamps and fields.  14 business metrics (scrape stats,
throttle events, circuit breaker state, cache hit rate, OCR success rate,
Celery task lifecycle) are defined in a shared metrics module and
instrumented across the scraper pipeline, API, and workers.  Celery
workers expose a Prometheus HTTP endpoint on configurable ports.
This commit is contained in:
Viktor Barzin 2026-02-14 10:59:12 +00:00
parent a1829957c1
commit d6edb747d2
No known key found for this signature in database
GPG key ID: 0EB088298288D958
12 changed files with 742 additions and 49 deletions

View file

@ -39,12 +39,13 @@ from services.listing_cache import (
from repositories.poi_repository import POIRepository
from repositories.user_repository import UserRepository
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from api.metrics import metrics_app
from opentelemetry.metrics import get_meter
from api.metrics import init_metrics, get_metrics_asgi_app, geojson_cache_operations
from logging_config import configure_logging
load_dotenv()
logger = logging.getLogger("uvicorn")
configure_logging("api")
logger = logging.getLogger(__name__)
DEFAULT_BATCH_SIZE = 50
_rate_limit_config = RateLimitConfig.from_env()
@ -99,16 +100,8 @@ app = FastAPI(
app.include_router(passkey_router)
app.include_router(poi_router)
app.include_router(ws_router)
app.mount("/metrics", metrics_app)
meter = get_meter(__name__)
request_counter = meter.create_counter(
name="custom_request_count",
description="Number of times /hello was called",
)
hist = meter.create_histogram(
name="custom_request_duration",
description="Duration of /hello requests in seconds",
)
init_metrics("realestate-crawler-api")
app.mount("/metrics", get_metrics_asgi_app())
# Allow CORS (for React frontend)
@ -146,8 +139,6 @@ async def unhandled_exception_handler(request: Request, exc: Exception) -> JSONR
@app.get("/api/status")
async def get_status() -> dict[str, str]:
request_counter.add(1, {"method": "GET", "path": "/status"})
hist.record(1.5, {"method": "GET", "path": "/status"})
return {"status": "OK"}
@ -333,8 +324,10 @@ async def stream_listing_geojson(
cached_count = get_cached_count(query_parameters)
if cached_count is not None and cached_count > 0 and not include_poi_distances:
geojson_cache_operations.add(1, {"result": "hit"})
generator = _stream_from_cache(query_parameters, batch_size, limit)
else:
geojson_cache_operations.add(1, {"result": "miss"})
generator = _stream_from_db(
query_parameters, batch_size, limit, poi_distances_lookup,
skip_cache=include_poi_distances,