wrongmove/grafana/dashboard.json
Viktor Barzin d6edb747d2
Add structured JSON logging, OTel business metrics, and Grafana dashboard
Structured logging via JsonFormatter replaces uvicorn's default format so
Loki can parse timestamps and fields.  14 business metrics (scrape stats,
throttle events, circuit breaker state, cache hit rate, OCR success rate,
Celery task lifecycle) are defined in a shared metrics module and
instrumented across the scraper pipeline, API, and workers.  Celery
workers expose a Prometheus HTTP endpoint on configurable ports.
2026-02-14 10:59:12 +00:00

345 lines
9.7 KiB
JSON

{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"id": 100,
"title": "Scrape Overview",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 300 },
{ "color": "red", "value": 600 }
]
}
}
},
"gridPos": { "h": 6, "w": 6, "x": 0, "y": 1 },
"id": 1,
"options": {
"reduceOptions": { "calcs": ["lastNotNull"] }
},
"title": "Last Scrape Duration",
"type": "stat",
"targets": [
{
"expr": "histogram_quantile(0.5, rate(scrape_duration_seconds_bucket[24h]))",
"legendFormat": "p50"
}
]
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": { "defaults": { "unit": "short" } },
"gridPos": { "h": 6, "w": 6, "x": 6, "y": 1 },
"id": 2,
"title": "Listings Found vs Processed",
"type": "timeseries",
"targets": [
{
"expr": "increase(scrape_listings_found_total[1h])",
"legendFormat": "Found"
},
{
"expr": "increase(scrape_listings_processed_total[1h])",
"legendFormat": "Processed"
}
]
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "red", "value": 1 }
]
}
}
},
"gridPos": { "h": 6, "w": 6, "x": 12, "y": 1 },
"id": 3,
"options": {
"reduceOptions": { "calcs": ["lastNotNull"] }
},
"title": "Failed Listings (Last Scrape)",
"type": "stat",
"targets": [
{
"expr": "increase(scrape_listings_failed_total[1h])",
"legendFormat": "Failed"
}
]
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": { "defaults": { "unit": "short" } },
"gridPos": { "h": 6, "w": 6, "x": 18, "y": 1 },
"id": 4,
"title": "Pages Fetched & Subqueries",
"type": "timeseries",
"targets": [
{
"expr": "increase(scrape_pages_fetched_total[1h])",
"legendFormat": "Pages"
},
{
"expr": "increase(scrape_subqueries_total[1h])",
"legendFormat": "Subqueries"
}
]
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 7 },
"id": 101,
"title": "Throttle & Circuit Breaker",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": { "defaults": { "unit": "short" } },
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"id": 5,
"title": "Throttle Events by Type",
"type": "timeseries",
"targets": [
{
"expr": "increase(throttle_events_total[5m])",
"legendFormat": "{{ type }}"
}
]
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"mappings": [
{ "options": { "0": { "text": "CLOSED", "color": "green" } }, "type": "value" },
{ "options": { "1": { "text": "HALF_OPEN", "color": "yellow" } }, "type": "value" },
{ "options": { "2": { "text": "OPEN", "color": "red" } }, "type": "value" }
]
}
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"id": 6,
"title": "Circuit Breaker State",
"type": "state-timeline",
"targets": [
{
"expr": "circuit_breaker_state",
"legendFormat": "State"
}
]
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 16 },
"id": 102,
"title": "API Performance",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": { "defaults": { "unit": "reqps" } },
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 17 },
"id": 7,
"title": "Request Rate by Endpoint",
"type": "timeseries",
"targets": [
{
"expr": "sum(rate(http_server_duration_milliseconds_count[5m])) by (http_route)",
"legendFormat": "{{ http_route }}"
}
]
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": { "defaults": { "unit": "ms" } },
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 17 },
"id": 8,
"title": "Latency Percentiles",
"type": "timeseries",
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(http_server_duration_milliseconds_bucket[5m])) by (le))",
"legendFormat": "p50"
},
{
"expr": "histogram_quantile(0.95, sum(rate(http_server_duration_milliseconds_bucket[5m])) by (le))",
"legendFormat": "p95"
},
{
"expr": "histogram_quantile(0.99, sum(rate(http_server_duration_milliseconds_bucket[5m])) by (le))",
"legendFormat": "p99"
}
]
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": { "unit": "percentunit" }
},
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 17 },
"id": 9,
"title": "GeoJSON Cache Hit Rate",
"type": "timeseries",
"targets": [
{
"expr": "sum(rate(geojson_cache_operations_total{result=\"hit\"}[5m])) / sum(rate(geojson_cache_operations_total[5m]))",
"legendFormat": "Hit Rate"
}
]
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 25 },
"id": 103,
"title": "Celery Tasks",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": { "defaults": { "unit": "short" } },
"gridPos": { "h": 8, "w": 6, "x": 0, "y": 26 },
"id": 10,
"title": "Active Tasks",
"type": "stat",
"options": {
"reduceOptions": { "calcs": ["lastNotNull"] }
},
"targets": [
{
"expr": "sum(celery_tasks_active)",
"legendFormat": "Active"
}
]
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": { "defaults": { "unit": "short" } },
"gridPos": { "h": 8, "w": 6, "x": 6, "y": 26 },
"id": 11,
"title": "Task Completion Rate",
"type": "timeseries",
"targets": [
{
"expr": "sum(rate(celery_tasks_total[5m])) by (status)",
"legendFormat": "{{ status }}"
}
]
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": { "defaults": { "unit": "s" } },
"gridPos": { "h": 8, "w": 6, "x": 12, "y": 26 },
"id": 12,
"title": "Task Duration (p50/p95)",
"type": "timeseries",
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(celery_task_duration_seconds_bucket[5m])) by (le, task_name))",
"legendFormat": "p50 {{ task_name }}"
},
{
"expr": "histogram_quantile(0.95, sum(rate(celery_task_duration_seconds_bucket[5m])) by (le, task_name))",
"legendFormat": "p95 {{ task_name }}"
}
]
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": { "defaults": { "unit": "short" } },
"gridPos": { "h": 8, "w": 6, "x": 18, "y": 26 },
"id": 13,
"title": "OCR Success Rate",
"type": "timeseries",
"targets": [
{
"expr": "increase(ocr_attempts_total[1h])",
"legendFormat": "Attempts"
},
{
"expr": "increase(ocr_successes_total[1h])",
"legendFormat": "Successes"
}
]
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 34 },
"id": 104,
"title": "Logs (Loki)",
"type": "row"
},
{
"datasource": { "type": "loki", "uid": "${DS_LOKI}" },
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 35 },
"id": 14,
"title": "Error Logs",
"type": "logs",
"options": {
"showTime": true,
"showLabels": true,
"showCommonLabels": false,
"wrapLogMessage": true,
"prettifyLogMessage": true,
"enableLogDetails": true,
"sortOrder": "Descending",
"dedupStrategy": "none"
},
"targets": [
{
"expr": "{job=\"realestate-crawler\"} | json | level = \"ERROR\"",
"legendFormat": ""
}
]
}
],
"refresh": "30s",
"schemaVersion": 39,
"tags": ["realestate-crawler", "monitoring"],
"templating": {
"list": [
{
"current": {},
"hide": 0,
"name": "DS_PROMETHEUS",
"type": "datasource",
"query": "prometheus"
},
{
"current": {},
"hide": 0,
"name": "DS_LOKI",
"type": "datasource",
"query": "loki"
}
]
},
"time": { "from": "now-24h", "to": "now" },
"timepicker": {},
"timezone": "browser",
"title": "Realestate Crawler",
"uid": "realestate-crawler",
"version": 1
}