Add structured JSON logging, OTel business metrics, and Grafana dashboard
Structured logging via JsonFormatter replaces uvicorn's default format so Loki can parse timestamps and fields. 14 business metrics (scrape stats, throttle events, circuit breaker state, cache hit rate, OCR success rate, Celery task lifecycle) are defined in a shared metrics module and instrumented across the scraper pipeline, API, and workers. Celery workers expose a Prometheus HTTP endpoint on configurable ports.
This commit is contained in:
parent
a1829957c1
commit
d6edb747d2
12 changed files with 742 additions and 49 deletions
|
|
@ -1,10 +1,16 @@
|
|||
import sys
|
||||
import time
|
||||
from celery import Celery
|
||||
from celery.signals import worker_ready, task_prerun, task_postrun
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
from logging_config import configure_logging
|
||||
|
||||
load_dotenv()
|
||||
|
||||
configure_logging(os.getenv("SERVICE_NAME", "celery-worker"))
|
||||
|
||||
app = Celery(
|
||||
"celery_app",
|
||||
broker=os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0"),
|
||||
|
|
@ -20,6 +26,51 @@ app.conf.update(
|
|||
enable_utc=True,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Celery metrics via prometheus_client
|
||||
# ---------------------------------------------------------------------------
|
||||
CELERY_METRICS_PORT = int(os.getenv("CELERY_METRICS_PORT", "9090"))
|
||||
|
||||
# Track task start times for duration measurement
|
||||
_task_start_times: dict[str, float] = {}
|
||||
|
||||
|
||||
@worker_ready.connect
|
||||
def _start_metrics_server(**kwargs: object) -> None:
|
||||
"""Start a lightweight Prometheus HTTP server in the worker process."""
|
||||
from api.metrics import init_metrics
|
||||
init_metrics(os.getenv("SERVICE_NAME", "celery-worker"))
|
||||
|
||||
from prometheus_client import start_http_server
|
||||
start_http_server(CELERY_METRICS_PORT)
|
||||
|
||||
|
||||
@task_prerun.connect
|
||||
def _on_task_prerun(task_id: str, task: object, **kwargs: object) -> None:
|
||||
from api.metrics import celery_tasks_active
|
||||
task_name = getattr(task, "name", "unknown")
|
||||
celery_tasks_active.add(1, {"task_name": task_name})
|
||||
_task_start_times[task_id] = time.monotonic()
|
||||
|
||||
|
||||
@task_postrun.connect
|
||||
def _on_task_postrun(
|
||||
task_id: str, task: object, state: str | None = None, **kwargs: object
|
||||
) -> None:
|
||||
from api.metrics import celery_tasks_total, celery_task_duration_seconds, celery_tasks_active
|
||||
task_name = getattr(task, "name", "unknown")
|
||||
status = state or "UNKNOWN"
|
||||
|
||||
celery_tasks_active.add(-1, {"task_name": task_name})
|
||||
celery_tasks_total.add(1, {"task_name": task_name, "status": status})
|
||||
|
||||
start = _task_start_times.pop(task_id, None)
|
||||
if start is not None:
|
||||
celery_task_duration_seconds.record(
|
||||
time.monotonic() - start, {"task_name": task_name}
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
with app.connection() as conn:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue