Add structured JSON logging, OTel business metrics, and Grafana dashboard

Structured logging via JsonFormatter replaces uvicorn's default format so
Loki can parse timestamps and fields.  14 business metrics (scrape stats,
throttle events, circuit breaker state, cache hit rate, OCR success rate,
Celery task lifecycle) are defined in a shared metrics module and
instrumented across the scraper pipeline, API, and workers.  Celery
workers expose a Prometheus HTTP endpoint on configurable ports.
This commit is contained in:
Viktor Barzin 2026-02-14 10:59:12 +00:00
parent a1829957c1
commit d6edb747d2
No known key found for this signature in database
GPG key ID: 0EB088298288D958
12 changed files with 742 additions and 49 deletions

80
logging_config.py Normal file
View file

@ -0,0 +1,80 @@
"""Centralized structured JSON logging configuration."""
from __future__ import annotations
import json
import logging
import sys
from datetime import datetime, timezone
from typing import Any
class JsonFormatter(logging.Formatter):
"""Outputs log records as single-line JSON for Loki ingestion."""
def __init__(self, service: str = "unknown") -> None:
super().__init__()
self.service = service
def format(self, record: logging.LogRecord) -> str:
log_entry: dict[str, Any] = {
"timestamp": datetime.fromtimestamp(
record.created, tz=timezone.utc
).isoformat(),
"level": record.levelname,
"logger": record.name,
"message": record.getMessage(),
"service": self.service,
}
# Merge any extra fields passed via `extra={...}` on the log call.
# Standard LogRecord attributes are excluded to keep output clean.
_standard = logging.LogRecord("", 0, "", 0, "", (), None).__dict__.keys()
for key, value in record.__dict__.items():
if key not in _standard and key not in log_entry:
log_entry[key] = value
if record.exc_info and record.exc_info[1] is not None:
log_entry["exception"] = self.formatException(record.exc_info)
return json.dumps(log_entry, default=str)
class _ServiceFilter(logging.Filter):
"""Injects the ``service`` field into every log record."""
def __init__(self, service: str) -> None:
super().__init__()
self.service = service
def filter(self, record: logging.LogRecord) -> bool:
record.service = self.service # type: ignore[attr-defined]
return True
def configure_logging(service_name: str) -> None:
"""Replace all handlers on the root logger with a single JSON stdout handler.
Uvicorn's access and error loggers are reconfigured to propagate through
the root logger so they also emit JSON.
"""
formatter = JsonFormatter(service=service_name)
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(formatter)
root = logging.getLogger()
root.handlers.clear()
root.addHandler(handler)
root.setLevel(logging.INFO)
root.addFilter(_ServiceFilter(service_name))
# Make uvicorn loggers propagate to root instead of using their own handlers
for uvicorn_logger_name in ("uvicorn", "uvicorn.error", "uvicorn.access"):
uv_logger = logging.getLogger(uvicorn_logger_name)
uv_logger.handlers.clear()
uv_logger.propagate = True
# Same for celery task logger
celery_logger = logging.getLogger("celery.task")
celery_logger.handlers.clear()
celery_logger.propagate = True