openclaw: realtime usage dashboard via Prometheus exporter sidecar

Stdlib-only Python exporter ($1) reads ~/.openclaw/agents/*/sessions/*.jsonl
(assistant messages with usage) plus auth-profiles.json (OAuth expiry,
Plus-tier label) and exposes Prometheus text format on :9099/metrics.
Container is python:3.12-slim; pod template gets prometheus.io/scrape
annotations so the existing kubernetes-pods job picks it up — no
ServiceMonitor needed.

Metrics exported:
  openclaw_codex_messages_total{provider,model,session_kind}    counter
  openclaw_codex_input/output/cache_read/cache_write_tokens_total
  openclaw_codex_message_errors_total{reason}
  openclaw_codex_active_sessions{kind}                          gauge
  openclaw_codex_oauth_expiry_seconds{provider,account,plan}    gauge
  openclaw_codex_last_run_timestamp                             gauge

Grafana dashboard "OpenClaw — Codex Usage" (Applications folder, 30s
refresh): messages/5h vs Plus rate-card, % of 1,200 floor, tokens/5h,
cache hit %, OAuth expiry days, active sessions, last-turn age, errors,
plus per-model timeseries + bar gauge + error table.

Plus rate-card thresholds in the gauge are conservative (1,200/5h floor;
real cap is dynamic 1,200–7,000). Re-baseline if throttling shows up
below 80%.
This commit is contained in:
Viktor Barzin 2026-05-07 09:04:25 +00:00
parent 4b39cb72da
commit ae70faf8be
4 changed files with 814 additions and 0 deletions

View file

@ -0,0 +1,476 @@
{
"annotations": {"list": []},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"refresh": "30s",
"schemaVersion": 38,
"tags": ["openclaw", "ai", "codex"],
"time": {"from": "now-6h", "to": "now"},
"timepicker": {},
"timezone": "",
"title": "OpenClaw — Codex Usage",
"uid": "openclaw-codex",
"version": 1,
"panels": [
{
"type": "row",
"id": 100,
"title": "Now",
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 0},
"collapsed": false,
"panels": []
},
{
"type": "stat",
"id": 1,
"title": "Messages last 5h — gpt-5.4-mini",
"description": "Plus rate-card lower bound: 1,200 / 5h. Hard cap at the upper bound: 7,000 / 5h.",
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"gridPos": {"h": 5, "w": 6, "x": 0, "y": 1},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
"textMode": "auto"
},
"fieldConfig": {
"defaults": {
"decimals": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "green", "value": null},
{"color": "yellow", "value": 960},
{"color": "orange", "value": 1500},
{"color": "red", "value": 5600}
]
},
"unit": "short"
}
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "sum(increase(openclaw_codex_messages_total{provider=\"openai-codex\",model=\"gpt-5.4-mini\"}[5h]))",
"refId": "A"
}
]
},
{
"type": "gauge",
"id": 2,
"title": "% of Plus 5h floor (1,200 cap)",
"description": "Conservative gauge against the lower bound of the published rate-card. Real ceiling depends on dynamic allocation (1,2007,000). Re-baseline if you observe throttling at <80%.",
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"gridPos": {"h": 5, "w": 6, "x": 6, "y": 1},
"options": {
"orientation": "auto",
"showThresholdLabels": false,
"showThresholdMarkers": true,
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}
},
"fieldConfig": {
"defaults": {
"min": 0,
"max": 100,
"decimals": 1,
"unit": "percent",
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "green", "value": null},
{"color": "yellow", "value": 60},
{"color": "orange", "value": 80},
{"color": "red", "value": 95}
]
}
}
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "100 * sum(increase(openclaw_codex_messages_total{provider=\"openai-codex\",model=\"gpt-5.4-mini\"}[5h])) / 1200",
"refId": "A"
}
]
},
{
"type": "stat",
"id": 3,
"title": "Tokens last 5h (input + output, codex)",
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"gridPos": {"h": 5, "w": 6, "x": 12, "y": 1},
"options": {
"colorMode": "value",
"graphMode": "area",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}
},
"fieldConfig": {
"defaults": {
"decimals": 0,
"unit": "short",
"thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}
}
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "sum(increase(openclaw_codex_input_tokens_total{provider=\"openai-codex\"}[5h])) + sum(increase(openclaw_codex_output_tokens_total{provider=\"openai-codex\"}[5h]))",
"refId": "A"
}
]
},
{
"type": "stat",
"id": 4,
"title": "Cache hit ratio (codex, 5h)",
"description": "cacheRead / (cacheRead + input). Higher is better — caching cuts effective Plus quota burn.",
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"gridPos": {"h": 5, "w": 6, "x": 18, "y": 1},
"options": {
"colorMode": "value",
"graphMode": "area",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}
},
"fieldConfig": {
"defaults": {
"min": 0,
"max": 100,
"decimals": 1,
"unit": "percent",
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "red", "value": null},
{"color": "yellow", "value": 30},
{"color": "green", "value": 60}
]
}
}
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "100 * sum(increase(openclaw_codex_cache_read_tokens_total{provider=\"openai-codex\"}[5h])) / clamp_min(sum(increase(openclaw_codex_input_tokens_total{provider=\"openai-codex\"}[5h])) + sum(increase(openclaw_codex_cache_read_tokens_total{provider=\"openai-codex\"}[5h])), 1)",
"refId": "A"
}
]
},
{
"type": "stat",
"id": 5,
"title": "OAuth token expiry",
"description": "Days until the openai-codex OAuth token expires. Re-run `openclaw models auth login --provider openai-codex` before this hits 0.",
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"gridPos": {"h": 5, "w": 6, "x": 0, "y": 6},
"options": {
"colorMode": "background",
"graphMode": "none",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}
},
"fieldConfig": {
"defaults": {
"decimals": 1,
"unit": "d",
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "red", "value": null},
{"color": "orange", "value": 1},
{"color": "yellow", "value": 3},
{"color": "green", "value": 5}
]
}
}
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "max(openclaw_codex_oauth_expiry_seconds{provider=\"openai-codex\"}) / 86400",
"refId": "A"
}
]
},
{
"type": "stat",
"id": 6,
"title": "Active sessions",
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"gridPos": {"h": 5, "w": 6, "x": 6, "y": 6},
"options": {
"colorMode": "value",
"graphMode": "none",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": true},
"textMode": "value_and_name"
},
"fieldConfig": {
"defaults": {
"unit": "short",
"thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}
}
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "openclaw_codex_active_sessions",
"legendFormat": "{{kind}}",
"refId": "A"
}
]
},
{
"type": "stat",
"id": 7,
"title": "Last assistant turn",
"description": "Time since the latest assistant message landed in any session.",
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"gridPos": {"h": 5, "w": 6, "x": 12, "y": 6},
"options": {
"colorMode": "background",
"graphMode": "none",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "green", "value": null},
{"color": "yellow", "value": 1800},
{"color": "orange", "value": 7200},
{"color": "red", "value": 86400}
]
}
}
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "time() - openclaw_codex_last_run_timestamp",
"refId": "A"
}
]
},
{
"type": "stat",
"id": 8,
"title": "Errors last 24h",
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"gridPos": {"h": 5, "w": 6, "x": 18, "y": 6},
"options": {
"colorMode": "background",
"graphMode": "area",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}
},
"fieldConfig": {
"defaults": {
"decimals": 0,
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "green", "value": null},
{"color": "yellow", "value": 1},
{"color": "red", "value": 10}
]
}
}
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "sum(increase(openclaw_codex_message_errors_total[24h]))",
"refId": "A"
}
]
},
{
"type": "row",
"id": 200,
"title": "Over time",
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 11},
"collapsed": false,
"panels": []
},
{
"type": "timeseries",
"id": 10,
"title": "Messages / min by model",
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"gridPos": {"h": 8, "w": 24, "x": 0, "y": 12},
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"custom": {
"drawStyle": "bars",
"fillOpacity": 60,
"lineWidth": 1,
"stacking": {"mode": "normal"}
},
"unit": "short"
}
},
"options": {
"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["sum"]},
"tooltip": {"mode": "multi", "sort": "desc"}
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "sum by (provider, model) (rate(openclaw_codex_messages_total[1m])) * 60",
"legendFormat": "{{provider}}/{{model}}",
"refId": "A"
}
]
},
{
"type": "timeseries",
"id": 11,
"title": "Tokens / min by type (codex)",
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 20},
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"custom": {
"drawStyle": "line",
"fillOpacity": 25,
"lineWidth": 2,
"stacking": {"mode": "none"}
},
"unit": "short"
}
},
"options": {
"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "desc"}
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "sum(rate(openclaw_codex_input_tokens_total{provider=\"openai-codex\"}[5m])) * 60",
"legendFormat": "input",
"refId": "A"
},
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "sum(rate(openclaw_codex_output_tokens_total{provider=\"openai-codex\"}[5m])) * 60",
"legendFormat": "output",
"refId": "B"
},
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "sum(rate(openclaw_codex_cache_read_tokens_total{provider=\"openai-codex\"}[5m])) * 60",
"legendFormat": "cache_read",
"refId": "C"
}
]
},
{
"type": "bargauge",
"id": 12,
"title": "Messages / 5h by model",
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 20},
"options": {
"displayMode": "gradient",
"orientation": "horizontal",
"showUnfilled": true,
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}
},
"fieldConfig": {
"defaults": {
"min": 0,
"decimals": 0,
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "green", "value": null},
{"color": "yellow", "value": 100},
{"color": "orange", "value": 500},
{"color": "red", "value": 1000}
]
}
}
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "sum by (provider, model) (increase(openclaw_codex_messages_total[5h]))",
"legendFormat": "{{provider}}/{{model}}",
"refId": "A"
}
]
},
{
"type": "row",
"id": 300,
"title": "Errors",
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 28},
"collapsed": false,
"panels": []
},
{
"type": "table",
"id": 20,
"title": "Recent errors by model and reason",
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"gridPos": {"h": 8, "w": 24, "x": 0, "y": 29},
"options": {
"showHeader": true
},
"fieldConfig": {
"defaults": {
"custom": {"align": "auto", "displayMode": "auto"}
},
"overrides": [
{
"matcher": {"id": "byName", "options": "Value"},
"properties": [
{"id": "displayName", "value": "Errors (24h)"},
{"id": "custom.displayMode", "value": "color-background"},
{
"id": "thresholds",
"value": {
"mode": "absolute",
"steps": [
{"color": "green", "value": null},
{"color": "yellow", "value": 1},
{"color": "red", "value": 10}
]
}
}
]
}
]
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "PBFA97CFB590B2093"},
"expr": "sum by (provider, model, reason) (increase(openclaw_codex_message_errors_total[24h])) > 0",
"format": "table",
"instant": true,
"refId": "A"
}
],
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {"Time": true, "__name__": true, "instance": true, "job": true, "namespace": true, "pod": true, "app": true},
"indexByName": {"provider": 0, "model": 1, "reason": 2, "Value": 3},
"renameByName": {}
}
}
]
}
]
}

View file

@ -134,6 +134,7 @@ locals {
# Applications
"qbittorrent.json" = "Applications"
"realestate-crawler.json" = "Applications"
"openclaw.json" = "Applications"
"uk-payslip.json" = "Finance (Personal)"
"wealth.json" = "Finance (Personal)"
"job-hunter.json" = "Finance"