Add structured JSON logging, OTel business metrics, and Grafana dashboard
Structured logging via JsonFormatter replaces uvicorn's default format so Loki can parse timestamps and fields. 14 business metrics (scrape stats, throttle events, circuit breaker state, cache hit rate, OCR success rate, Celery task lifecycle) are defined in a shared metrics module and instrumented across the scraper pipeline, API, and workers. Celery workers expose a Prometheus HTTP endpoint on configurable ports.
This commit is contained in:
parent
a1829957c1
commit
d6edb747d2
12 changed files with 742 additions and 49 deletions
|
|
@ -45,14 +45,17 @@ class ThrottleMetrics:
|
|||
def record_rate_limit(self) -> None:
|
||||
"""Record a rate limit error (HTTP 429)."""
|
||||
self.rate_limit_count += 1
|
||||
_increment_throttle_metric("rate_limit")
|
||||
|
||||
def record_service_unavailable(self) -> None:
|
||||
"""Record a service unavailable error (HTTP 503)."""
|
||||
self.service_unavailable_count += 1
|
||||
_increment_throttle_metric("service_unavailable")
|
||||
|
||||
def record_ip_blocked(self) -> None:
|
||||
"""Record an IP blocked error (HTTP 403)."""
|
||||
self.ip_blocked_count += 1
|
||||
_increment_throttle_metric("ip_blocked")
|
||||
|
||||
def record_slow_response(self, response_time: float) -> None:
|
||||
"""Record a slow response.
|
||||
|
|
@ -63,14 +66,17 @@ class ThrottleMetrics:
|
|||
self.slow_response_count += 1
|
||||
self.total_response_time += response_time
|
||||
self.total_requests += 1
|
||||
_increment_throttle_metric("slow_response")
|
||||
|
||||
def record_empty_response(self) -> None:
|
||||
"""Record an unexpected empty response."""
|
||||
self.empty_response_count += 1
|
||||
_increment_throttle_metric("empty_response")
|
||||
|
||||
def record_invalid_response(self) -> None:
|
||||
"""Record an invalid or error response."""
|
||||
self.invalid_response_count += 1
|
||||
_increment_throttle_metric("invalid_response")
|
||||
|
||||
def record_request(self, response_time: float) -> None:
|
||||
"""Record a successful request.
|
||||
|
|
@ -150,6 +156,15 @@ def reset_throttle_metrics() -> None:
|
|||
_global_metrics = ThrottleMetrics()
|
||||
|
||||
|
||||
def _increment_throttle_metric(event_type: str) -> None:
|
||||
"""Safely increment the OTel throttle counter if metrics are initialised."""
|
||||
try:
|
||||
from api.metrics import throttle_events_total
|
||||
throttle_events_total.add(1, {"type": event_type})
|
||||
except Exception:
|
||||
pass # Metrics not yet initialised (e.g. during tests)
|
||||
|
||||
|
||||
def validate_response(
|
||||
response: aiohttp.ClientResponse,
|
||||
response_time: float,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue