Fix celery metrics not reaching Prometheus and update Grafana dashboard
Init OTel metrics at module level in celery_app.py so prefork child processes inherit the MeterProvider and PrometheusMetricReader from the parent. Previously, worker_process_init created a separate MeterProvider in each child, disconnected from the HTTP server in the main process — so all scrape/celery/OCR metrics were silently lost. Update Grafana dashboard with API Performance and Frontend Performance sections, synced from the live cluster dashboard.
This commit is contained in:
parent
bfee06525b
commit
67d4ab3821
2 changed files with 2510 additions and 250 deletions
|
|
@ -1,7 +1,7 @@
|
|||
import sys
|
||||
import time
|
||||
from celery import Celery
|
||||
from celery.signals import worker_ready, worker_process_init, task_prerun, task_postrun
|
||||
from celery.signals import worker_ready, task_prerun, task_postrun
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
|
|
@ -34,20 +34,17 @@ CELERY_METRICS_PORT = int(os.getenv("CELERY_METRICS_PORT", "9090"))
|
|||
# Track task start times for duration measurement
|
||||
_task_start_times: dict[str, float] = {}
|
||||
|
||||
|
||||
@worker_process_init.connect
|
||||
def _init_worker_metrics(**kwargs: object) -> None:
|
||||
"""Initialise OTel metrics in each prefork worker process."""
|
||||
from api.metrics import init_metrics
|
||||
init_metrics(os.getenv("SERVICE_NAME", "celery-worker"))
|
||||
# Initialise OTel metrics at module level so prefork children inherit the
|
||||
# MeterProvider and PrometheusMetricReader. The prometheus_client collectors
|
||||
# are registered in the default registry before fork, so child-process
|
||||
# recordings are visible to the HTTP server started in the main process.
|
||||
from api.metrics import init_metrics as _init_metrics # noqa: E402
|
||||
_init_metrics(os.getenv("SERVICE_NAME", "celery-worker"))
|
||||
|
||||
|
||||
@worker_ready.connect
|
||||
def _start_metrics_server(**kwargs: object) -> None:
|
||||
"""Start a lightweight Prometheus HTTP server in the main worker process."""
|
||||
from api.metrics import init_metrics
|
||||
init_metrics(os.getenv("SERVICE_NAME", "celery-worker"))
|
||||
|
||||
from prometheus_client import start_http_server
|
||||
start_http_server(CELERY_METRICS_PORT)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue