Add structured JSON logging, OTel business metrics, and Grafana dashboard
Structured logging via JsonFormatter replaces uvicorn's default format so Loki can parse timestamps and fields. 14 business metrics (scrape stats, throttle events, circuit breaker state, cache hit rate, OCR success rate, Celery task lifecycle) are defined in a shared metrics module and instrumented across the scraper pipeline, API, and workers. Celery workers expose a Prometheus HTTP endpoint on configurable ports.
This commit is contained in:
parent
a1829957c1
commit
d6edb747d2
12 changed files with 742 additions and 49 deletions
80
logging_config.py
Normal file
80
logging_config.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
"""Centralized structured JSON logging configuration."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
|
||||
class JsonFormatter(logging.Formatter):
|
||||
"""Outputs log records as single-line JSON for Loki ingestion."""
|
||||
|
||||
def __init__(self, service: str = "unknown") -> None:
|
||||
super().__init__()
|
||||
self.service = service
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
log_entry: dict[str, Any] = {
|
||||
"timestamp": datetime.fromtimestamp(
|
||||
record.created, tz=timezone.utc
|
||||
).isoformat(),
|
||||
"level": record.levelname,
|
||||
"logger": record.name,
|
||||
"message": record.getMessage(),
|
||||
"service": self.service,
|
||||
}
|
||||
|
||||
# Merge any extra fields passed via `extra={...}` on the log call.
|
||||
# Standard LogRecord attributes are excluded to keep output clean.
|
||||
_standard = logging.LogRecord("", 0, "", 0, "", (), None).__dict__.keys()
|
||||
for key, value in record.__dict__.items():
|
||||
if key not in _standard and key not in log_entry:
|
||||
log_entry[key] = value
|
||||
|
||||
if record.exc_info and record.exc_info[1] is not None:
|
||||
log_entry["exception"] = self.formatException(record.exc_info)
|
||||
|
||||
return json.dumps(log_entry, default=str)
|
||||
|
||||
|
||||
class _ServiceFilter(logging.Filter):
|
||||
"""Injects the ``service`` field into every log record."""
|
||||
|
||||
def __init__(self, service: str) -> None:
|
||||
super().__init__()
|
||||
self.service = service
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
record.service = self.service # type: ignore[attr-defined]
|
||||
return True
|
||||
|
||||
|
||||
def configure_logging(service_name: str) -> None:
|
||||
"""Replace all handlers on the root logger with a single JSON stdout handler.
|
||||
|
||||
Uvicorn's access and error loggers are reconfigured to propagate through
|
||||
the root logger so they also emit JSON.
|
||||
"""
|
||||
formatter = JsonFormatter(service=service_name)
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
root = logging.getLogger()
|
||||
root.handlers.clear()
|
||||
root.addHandler(handler)
|
||||
root.setLevel(logging.INFO)
|
||||
root.addFilter(_ServiceFilter(service_name))
|
||||
|
||||
# Make uvicorn loggers propagate to root instead of using their own handlers
|
||||
for uvicorn_logger_name in ("uvicorn", "uvicorn.error", "uvicorn.access"):
|
||||
uv_logger = logging.getLogger(uvicorn_logger_name)
|
||||
uv_logger.handlers.clear()
|
||||
uv_logger.propagate = True
|
||||
|
||||
# Same for celery task logger
|
||||
celery_logger = logging.getLogger("celery.task")
|
||||
celery_logger.handlers.clear()
|
||||
celery_logger.propagate = True
|
||||
Loading…
Add table
Add a link
Reference in a new issue