wrongmove/api/metrics.py

153 lines
5.3 KiB
Python
Raw Normal View History

"""OpenTelemetry metrics with Prometheus export.
Provides ``init_metrics()`` to lazily initialise the MeterProvider and all
business metric instruments. Safe to call from both the API and Celery
workers the provider is created at most once per process.
"""
from __future__ import annotations
from opentelemetry.metrics import (
Counter,
Histogram,
Meter,
UpDownCounter,
get_meter,
set_meter_provider,
)
2025-08-02 17:25:56 +00:00
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
from opentelemetry.exporter.prometheus import PrometheusMetricReader
from prometheus_client import make_asgi_app
_reader: PrometheusMetricReader | None = None
_meter: Meter | None = None
# ---------------------------------------------------------------------------
# Scrape metrics
# ---------------------------------------------------------------------------
scrape_listings_found: Counter
scrape_listings_processed: Counter
scrape_listings_failed: Counter
scrape_duration_seconds: Histogram
scrape_pages_fetched: Counter
scrape_subqueries_total: Counter
# ---------------------------------------------------------------------------
# Throttle / circuit-breaker metrics
# ---------------------------------------------------------------------------
throttle_events_total: Counter
# circuit_breaker_state is registered as an ObservableGauge in circuit_breaker.py
# ---------------------------------------------------------------------------
# API / cache metrics
# ---------------------------------------------------------------------------
geojson_cache_operations: Counter
# ---------------------------------------------------------------------------
# OCR metrics
# ---------------------------------------------------------------------------
ocr_attempts: Counter
ocr_successes: Counter
# ---------------------------------------------------------------------------
# Celery task metrics
# ---------------------------------------------------------------------------
celery_tasks_total: Counter
celery_task_duration_seconds: Histogram
celery_tasks_active: UpDownCounter
def init_metrics(service_name: str = "realestate-crawler") -> PrometheusMetricReader:
"""Initialise the OTel MeterProvider and define all instruments.
Returns the ``PrometheusMetricReader`` so the API can mount the ASGI app.
Calling this more than once is a no-op (returns the existing reader).
"""
global _reader, _meter
global scrape_listings_found, scrape_listings_processed, scrape_listings_failed
global scrape_duration_seconds, scrape_pages_fetched, scrape_subqueries_total
global throttle_events_total
global geojson_cache_operations
global ocr_attempts, ocr_successes
global celery_tasks_total, celery_task_duration_seconds, celery_tasks_active
if _reader is not None:
return _reader
_reader = PrometheusMetricReader()
provider = MeterProvider(
metric_readers=[_reader],
resource=Resource.create({SERVICE_NAME: service_name}),
)
set_meter_provider(provider)
_meter = get_meter(__name__)
# -- Scrape --
scrape_listings_found = _meter.create_counter(
"scrape_listings_found_total",
description="Total listings discovered during scrape runs",
)
scrape_listings_processed = _meter.create_counter(
"scrape_listings_processed_total",
description="Total listings successfully processed",
)
scrape_listings_failed = _meter.create_counter(
"scrape_listings_failed_total",
description="Total listings that failed processing",
)
scrape_duration_seconds = _meter.create_histogram(
"scrape_duration_seconds",
description="Duration of a full scrape run in seconds",
)
scrape_pages_fetched = _meter.create_counter(
"scrape_pages_fetched_total",
description="Total API pages fetched during scraping",
)
scrape_subqueries_total = _meter.create_counter(
"scrape_subqueries_total",
description="Total subqueries executed after query splitting",
)
# -- Throttle --
throttle_events_total = _meter.create_counter(
"throttle_events_total",
description="Total throttling events by type",
)
# -- Cache --
geojson_cache_operations = _meter.create_counter(
"geojson_cache_operations_total",
description="GeoJSON cache operations (hit/miss)",
)
# -- OCR --
ocr_attempts = _meter.create_counter(
"ocr_attempts_total",
description="Total OCR detection attempts",
)
ocr_successes = _meter.create_counter(
"ocr_successes_total",
description="Total OCR detections that found square meters",
)
# -- Celery --
celery_tasks_total = _meter.create_counter(
"celery_tasks_total",
description="Total Celery tasks by name and status",
)
celery_task_duration_seconds = _meter.create_histogram(
"celery_task_duration_seconds",
description="Duration of Celery tasks in seconds",
)
celery_tasks_active = _meter.create_up_down_counter(
"celery_tasks_active",
description="Currently active Celery tasks",
)
return _reader
2025-08-02 17:25:56 +00:00
def get_metrics_asgi_app(): # type: ignore[no-untyped-def]
"""Return the Prometheus ASGI app for mounting at /metrics."""
return make_asgi_app()