Add structured JSON logging, OTel business metrics, and Grafana dashboard
Structured logging via JsonFormatter replaces uvicorn's default format so Loki can parse timestamps and fields. 14 business metrics (scrape stats, throttle events, circuit breaker state, cache hit rate, OCR success rate, Celery task lifecycle) are defined in a shared metrics module and instrumented across the scraper pipeline, API, and workers. Celery workers expose a Prometheus HTTP endpoint on configurable ports.
This commit is contained in:
parent
a1829957c1
commit
d6edb747d2
12 changed files with 742 additions and 49 deletions
|
|
@ -23,15 +23,8 @@ from services.task_progress_publisher import publish_task_progress
|
|||
|
||||
logger = logging.getLogger("uvicorn.error")
|
||||
|
||||
# Also configure a celery-specific logger that always outputs to stdout
|
||||
# Central logging is now configured in celery_app.py via logging_config
|
||||
celery_logger = logging.getLogger("celery.task")
|
||||
if not celery_logger.handlers:
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s [%(levelname)s] %(name)s: %(message)s"
|
||||
))
|
||||
celery_logger.addHandler(handler)
|
||||
celery_logger.setLevel(logging.INFO)
|
||||
|
||||
SCRAPE_LOCK_NAME = "scrape_listings"
|
||||
LOG_BUFFER_MAX_LINES = 200
|
||||
|
|
@ -574,6 +567,22 @@ async def _dump_listings_full_inner(
|
|||
)
|
||||
celery_logger.info("=" * 60)
|
||||
|
||||
# Record scrape metrics
|
||||
from api.metrics import (
|
||||
scrape_listings_found,
|
||||
scrape_listings_processed,
|
||||
scrape_listings_failed,
|
||||
scrape_duration_seconds,
|
||||
scrape_pages_fetched,
|
||||
scrape_subqueries_total as scrape_subqueries_metric,
|
||||
)
|
||||
scrape_listings_found.add(state.ids_collected)
|
||||
scrape_listings_processed.add(state.processed_count)
|
||||
scrape_listings_failed.add(state.failed_count)
|
||||
scrape_duration_seconds.record(elapsed)
|
||||
scrape_pages_fetched.add(state.total_pages_fetched)
|
||||
scrape_subqueries_metric.add(state.completed_subqueries)
|
||||
|
||||
invalidate_cache()
|
||||
|
||||
_update_task_state(task, "Completed", {
|
||||
|
|
|
|||
|
|
@ -14,14 +14,8 @@ from services.task_progress_publisher import publish_task_progress
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Central logging is now configured in celery_app.py via logging_config
|
||||
celery_logger = logging.getLogger("celery.task")
|
||||
if not celery_logger.handlers:
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s [%(levelname)s] %(name)s: %(message)s"
|
||||
))
|
||||
celery_logger.addHandler(handler)
|
||||
celery_logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
@app.task(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue