fix: restore tree dropped by 6d224861; land stem95su gdrive-sync (10m) [ci skip]

6d224861 came from a --no-checkout worktree whose empty index made the
commit drop every file except two. This restores 05b50d2b's full tree and
correctly adds stacks/stem95su/gdrive-sync.tf + the service-catalog stem95su
entry. Forward-only (parent=6d224861, no force-push); [ci skip] since the
live infra was never applied from the broken commit.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-06-09 08:45:33 +00:00
parent 6d224861c4
commit fd0f4a0365
1166 changed files with 358546 additions and 0 deletions

View file

@ -0,0 +1,264 @@
#!/usr/bin/env python3
"""OpenClaw / Codex usage exporter.
Reads ~/.openclaw/agents/*/sessions/*.jsonl (assistant messages with usage)
and ~/.openclaw/agents/*/agent/auth-state.json (OAuth profiles), then exposes
Prometheus text-format metrics on :9099/metrics. Stdlib only no pip install
needed at startup.
Metrics (all cumulative-since-session-start; use Prometheus increase()/rate()
for windowed views):
openclaw_codex_messages_total{provider,model,session_kind} counter
openclaw_codex_input_tokens_total{provider,model} counter
openclaw_codex_output_tokens_total{provider,model} counter
openclaw_codex_cache_read_tokens_total{provider,model} counter
openclaw_codex_cache_write_tokens_total{provider,model} counter
openclaw_codex_message_errors_total{provider,model,reason} counter
openclaw_codex_active_sessions{kind} gauge
openclaw_codex_oauth_expiry_seconds{provider,account} gauge
openclaw_codex_last_run_timestamp gauge
openclaw_codex_exporter_scrape_duration_ms gauge
"""
import glob
import json
import os
import re
import time
from datetime import datetime
from http.server import BaseHTTPRequestHandler, HTTPServer
from threading import Lock
OPENCLAW_HOME = os.environ.get("OPENCLAW_HOME", "/home/node/.openclaw")
PORT = int(os.environ.get("METRICS_PORT", "9099"))
CACHE_SEC = float(os.environ.get("CACHE_SEC", "5"))
SKIP_FRAGMENTS = (".broken.", ".reset.", ".deleted.", ".bak.")
SESSION_RE = re.compile(r"^([0-9a-f-]{36})\.jsonl$")
_lock = Lock()
_cache = {"text": "", "ts": 0.0}
def _esc(value: str) -> str:
return str(value).replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
def _line(name: str, labels: dict, value) -> str:
if labels:
rendered = ",".join(f'{k}="{_esc(v)}"' for k, v in sorted(labels.items()))
return f"{name}{{{rendered}}} {value}"
return f"{name} {value}"
def _kind_for(session_id: str, sessions_index: dict) -> str:
for key, val in sessions_index.items():
if val.get("sessionId") != session_id:
continue
if key.startswith("agent:main:cron:"):
return "cron"
if key.startswith("telegram:slash:"):
return "telegram-slash"
if key.startswith("agent:main:"):
return "main"
surface = (val.get("origin") or {}).get("surface")
if surface:
return surface
return key.split(":", 1)[0]
return "unknown"
def _parse_ts(value):
if isinstance(value, (int, float)):
return float(value)
if isinstance(value, str):
try:
return datetime.fromisoformat(value.replace("Z", "+00:00")).timestamp()
except ValueError:
return 0.0
return 0.0
def _build_text() -> str:
start = time.monotonic()
out = []
sessions_index: dict = {}
for sp in glob.glob(os.path.join(OPENCLAW_HOME, "agents/*/sessions/sessions.json")):
try:
with open(sp) as f:
sessions_index.update(json.load(f))
except Exception:
pass
msg_count: dict = {}
in_tok: dict = {}
out_tok: dict = {}
cr_tok: dict = {}
cw_tok: dict = {}
err_count: dict = {}
latest_ts = 0.0
for jsonl in glob.glob(os.path.join(OPENCLAW_HOME, "agents/*/sessions/*.jsonl")):
bn = os.path.basename(jsonl)
if any(s in bn for s in SKIP_FRAGMENTS):
continue
m = SESSION_RE.match(bn)
if not m:
continue
sid = m.group(1)
kind = _kind_for(sid, sessions_index)
try:
with open(jsonl) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
except Exception:
continue
if obj.get("type") != "message":
continue
msg = obj.get("message") or {}
if msg.get("role") != "assistant":
continue
provider = msg.get("provider") or "unknown"
model = msg.get("model") or "unknown"
usage = msg.get("usage") or {}
ts = _parse_ts(obj.get("timestamp"))
if ts > latest_ts:
latest_ts = ts
if msg.get("stopReason") == "error":
reason = (msg.get("errorMessage") or "unknown")[:80]
ek = (provider, model, reason)
err_count[ek] = err_count.get(ek, 0) + 1
continue
mk = (provider, model, kind)
msg_count[mk] = msg_count.get(mk, 0) + 1
pm = (provider, model)
in_tok[pm] = in_tok.get(pm, 0) + (usage.get("input") or 0)
out_tok[pm] = out_tok.get(pm, 0) + (usage.get("output") or 0)
cr_tok[pm] = cr_tok.get(pm, 0) + (usage.get("cacheRead") or 0)
cw_tok[pm] = cw_tok.get(pm, 0) + (usage.get("cacheWrite") or 0)
except Exception:
pass
out.append("# HELP openclaw_codex_messages_total Cumulative assistant messages")
out.append("# TYPE openclaw_codex_messages_total counter")
for (p, mdl, k), c in msg_count.items():
out.append(_line("openclaw_codex_messages_total",
{"provider": p, "model": mdl, "session_kind": k}, c))
for name, src, hlp in [
("openclaw_codex_input_tokens_total", in_tok, "Cumulative input tokens"),
("openclaw_codex_output_tokens_total", out_tok, "Cumulative output tokens"),
("openclaw_codex_cache_read_tokens_total", cr_tok, "Cumulative cache-read tokens"),
("openclaw_codex_cache_write_tokens_total", cw_tok, "Cumulative cache-write tokens"),
]:
out.append(f"# HELP {name} {hlp}")
out.append(f"# TYPE {name} counter")
for (p, mdl), c in src.items():
out.append(_line(name, {"provider": p, "model": mdl}, c))
out.append("# HELP openclaw_codex_message_errors_total Cumulative assistant errors")
out.append("# TYPE openclaw_codex_message_errors_total counter")
for (p, mdl, r), c in err_count.items():
out.append(_line("openclaw_codex_message_errors_total",
{"provider": p, "model": mdl, "reason": r}, c))
out.append("# HELP openclaw_codex_active_sessions Active sessions in sessions.json")
out.append("# TYPE openclaw_codex_active_sessions gauge")
kc: dict = {}
for k in sessions_index:
if k.startswith("agent:main:cron:"):
kk = "cron"
elif k.startswith("telegram:slash:"):
kk = "telegram-slash"
elif k.startswith("agent:main:"):
kk = "main"
else:
kk = k.split(":", 1)[0]
kc[kk] = kc.get(kk, 0) + 1
for k, c in kc.items():
out.append(_line("openclaw_codex_active_sessions", {"kind": k}, c))
if latest_ts:
out.append("# HELP openclaw_codex_last_run_timestamp Unix ts of newest assistant message")
out.append("# TYPE openclaw_codex_last_run_timestamp gauge")
out.append(_line("openclaw_codex_last_run_timestamp", {}, latest_ts))
out.append("# HELP openclaw_codex_oauth_expiry_seconds Seconds until OAuth token expires")
out.append("# TYPE openclaw_codex_oauth_expiry_seconds gauge")
now = time.time()
for af in glob.glob(os.path.join(OPENCLAW_HOME, "agents/*/agent/auth-profiles.json")):
try:
with open(af) as f:
data = json.load(f)
except Exception:
continue
# Schema: {"version": 1, "profiles": {"<id>": {...}}}.
# `expires` is Unix milliseconds.
for profile in (data.get("profiles") or {}).values():
exp_ms = profile.get("expires")
if not isinstance(exp_ms, (int, float)):
continue
exp_ts = exp_ms / 1000.0
out.append(_line(
"openclaw_codex_oauth_expiry_seconds",
{
"provider": profile.get("provider", "unknown"),
"account": profile.get("email") or profile.get("account") or "unknown",
"plan": profile.get("chatgptPlanType") or "unknown",
},
max(0, exp_ts - now),
))
out.append("# HELP openclaw_codex_exporter_scrape_duration_ms Last scrape duration ms")
out.append("# TYPE openclaw_codex_exporter_scrape_duration_ms gauge")
out.append(_line("openclaw_codex_exporter_scrape_duration_ms", {},
(time.monotonic() - start) * 1000))
return "\n".join(out) + "\n"
class Handler(BaseHTTPRequestHandler):
def do_GET(self):
if self.path == "/healthz":
self.send_response(200)
self.send_header("Content-Type", "text/plain")
self.end_headers()
self.wfile.write(b"ok\n")
return
if self.path != "/metrics":
self.send_response(404)
self.end_headers()
return
with _lock:
now = time.time()
if now - _cache["ts"] > CACHE_SEC:
try:
_cache["text"] = _build_text()
except Exception as exc: # noqa: BLE001
_cache["text"] = (
f'openclaw_codex_exporter_errors_total{{kind="scrape"}} 1\n'
f'# scrape error: {_esc(str(exc))[:200]}\n'
)
_cache["ts"] = now
body = _cache["text"].encode()
self.send_response(200)
self.send_header("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
def log_message(self, *args, **kwargs):
pass
def main():
print(f"openclaw exporter listening on :{PORT}", flush=True)
HTTPServer(("0.0.0.0", PORT), Handler).serve_forever()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,90 @@
"""claude-memory → OpenClaw memory-core sync.
Pulls memories from the central claude-memory REST API, writes per-category
Markdown files into /workspace/memory/projects/claude-memory-sync/
which memory-core picks up via its QMD backend.
Runs inside the openclaw pod (piped via `kubectl exec -i -- python3 -`).
Uses MEMORY_API_URL + MEMORY_API_KEY env vars already set on the pod.
Filters out is_sensitive=true memories. Also one-shot deletes the stale
metaclaw-export.json from a prior export attempt.
"""
import json
import os
import pathlib
import sys
import time
import urllib.request
def main() -> int:
api_url = os.environ["MEMORY_API_URL"].rstrip("/")
api_key = os.environ["MEMORY_API_KEY"]
req = urllib.request.Request(
f"{api_url}/api/memories?limit=10000",
headers={"Authorization": f"Bearer {api_key}"},
)
with urllib.request.urlopen(req, timeout=30) as r:
data = json.load(r)
raw = data.get("memories", [])
mems = [m for m in raw if not m.get("is_sensitive", False)]
sensitive_count = len(raw) - len(mems)
by_cat: dict[str, list[dict]] = {}
for m in mems:
by_cat.setdefault(m.get("category") or "uncategorized", []).append(m)
# Write under /workspace/memory/ — memory-core's QMD backend auto-indexes
# this path on every reindex. /home/node/.openclaw/memory/ is the
# SQLite index location, not a content source.
out_dir = pathlib.Path("/workspace/memory/projects/claude-memory-sync")
out_dir.mkdir(parents=True, exist_ok=True)
stamp = time.strftime("%Y-%m-%d %H:%M UTC", time.gmtime())
for cat, items in sorted(by_cat.items()):
items.sort(key=lambda x: x.get("id", 0))
lines = [
f"# {cat.title()} memories",
"",
f"_Synced from claude-memory at {stamp}. {len(items)} memories._",
"",
]
for m in items:
content = m.get("content") or ""
first_line = content.splitlines()[0] if content else ""
title = first_line.lstrip("# ").strip()[:120] or f"#{m['id']}"
lines.extend([
f"## #{m['id']}{title}",
"",
f"- Tags: `{m.get('tags', '')}`",
f"- Importance: {float(m.get('importance', 0.5)):.2f}",
f"- Created: {m.get('created_at', '?')}",
f"- Updated: {m.get('updated_at', '?')}",
"",
content,
"",
"---",
"",
])
(out_dir / f"{cat}.md").write_text("\n".join(lines))
# One-shot: nuke the stale 2026-02-28 export sitting next to memory-core.
stale = pathlib.Path("/home/node/.openclaw/memory/metaclaw-export.json")
if stale.exists():
stale.unlink()
print("[sync] deleted stale metaclaw-export.json")
total = sum(len(v) for v in by_cat.values())
print(
f"[sync] wrote {total} memories across {len(by_cat)} categories to "
f"{out_dir} (skipped {sensitive_count} sensitive)"
)
return 0
if __name__ == "__main__":
sys.exit(main())

View file

@ -0,0 +1,184 @@
#!/usr/bin/env bash
# openclaw-task — manage long-running tmux tasks on devvm
#
# Canonical source: infra/stacks/openclaw/files/openclaw-task.sh
# Installed to /usr/local/bin/openclaw-task on devvm so non-interactive
# SSH (e.g. `ssh devvm openclaw-task list`) finds it on the default PATH.
#
# Sessions are prefixed `openclaw-task-` to avoid colliding with the
# user's own tmux work. Persistent transcripts live in
# ~/openclaw-tasks/<id>.log via `tmux pipe-pane`. Sessions and logs
# survive OpenClaw pod restarts (they live on devvm, not in the pod).
set -euo pipefail
# Use full paths because non-interactive SSH does not source ~/.profile
# or ~/.bashrc (see memory id=740).
TMUX_BIN=/usr/bin/tmux
CLAUDE_BIN=/usr/local/bin/claude # installed as symlink to /home/wizard/.local/bin/claude
PREFIX=openclaw-task-
TASK_DIR=${OPENCLAW_TASK_DIR:-$HOME/openclaw-tasks}
mkdir -p "$TASK_DIR"
die() { echo "openclaw-task: $*" >&2; exit 1; }
session_name() { printf 'openclaw-task-%s' "$1"; }
require_session() {
local name="$1"
"$TMUX_BIN" has-session -t "$name" 2>/dev/null || die "no session '$name' (use 'openclaw-task list')"
}
usage() {
cat <<EOF
openclaw-task — manage long-running tmux tasks on devvm
USAGE
openclaw-task new <id> <command...> spawn detached tmux session
openclaw-task claude <id> [prompt...] spawn interactive claude in a session;
if prompt given, send-keys it + Enter
openclaw-task send <id> <keys...> tmux send-keys passthrough (you must
pass 'Enter' literal for newline)
openclaw-task capture <id> [lines] last <lines> of pane (default 1000)
openclaw-task log <id> cat the persistent pipe-pane log
openclaw-task tail <id> tail -f the persistent log
openclaw-task list all openclaw task ids (one per line)
openclaw-task status <id> 'running' or 'ended'
openclaw-task kill <id> kill session (log file kept)
openclaw-task purge <id> kill + delete log file
EXAMPLES
openclaw-task new build-foo "cd ~/code/foo && make all 2>&1"
openclaw-task claude diag-frigate
openclaw-task send diag-frigate "investigate gpu crashloop" Enter
openclaw-task capture diag-frigate 200
openclaw-task list
EOF
}
cmd_new() {
[ $# -lt 2 ] && die "usage: openclaw-task new <id> <command...>"
local id="$1"; shift
local name; name=$(session_name "$id")
if "$TMUX_BIN" has-session -t "$name" 2>/dev/null; then
die "session '$name' already exists"
fi
local log="$TASK_DIR/$id.log"
: > "$log"
# Start an idle interactive bash so pipe-pane can attach BEFORE the
# user's command runs. If we passed the command directly to
# new-session, its first lines beat pipe-pane to the pane and never
# land in the log.
"$TMUX_BIN" new-session -d -s "$name" bash --norc -i
"$TMUX_BIN" pipe-pane -o -t "$name" "cat >> '$log'"
sleep 0.2
"$TMUX_BIN" send-keys -t "$name" "$*" Enter
# Auto-exit propagating the command's status so the tmux session
# ends when the command does.
"$TMUX_BIN" send-keys -t "$name" 'exit $?' Enter
printf 'session: %s\nlog: %s\n' "$name" "$log"
}
cmd_claude() {
[ $# -lt 1 ] && die "usage: openclaw-task claude <id> [prompt...]"
local id="$1"; shift
local name; name=$(session_name "$id")
if "$TMUX_BIN" has-session -t "$name" 2>/dev/null; then
die "session '$name' already exists (use 'send' to add prompts)"
fi
local log="$TASK_DIR/$id.log"
: > "$log"
# sleep+exec lets pipe-pane attach before claude prints its banner.
"$TMUX_BIN" new-session -d -s "$name" bash -c "sleep 0.3; exec '$CLAUDE_BIN'"
"$TMUX_BIN" pipe-pane -o -t "$name" "cat >> '$log'"
if [ $# -gt 0 ]; then
# Wait for claude to come up before sending the prompt
sleep 2
"$TMUX_BIN" send-keys -t "$name" "$*" Enter
fi
printf 'session: %s\nlog: %s\n' "$name" "$log"
}
cmd_send() {
[ $# -lt 2 ] && die "usage: openclaw-task send <id> <keys...>"
local id="$1"; shift
local name; name=$(session_name "$id")
require_session "$name"
"$TMUX_BIN" send-keys -t "$name" "$@"
}
cmd_capture() {
[ $# -lt 1 ] && die "usage: openclaw-task capture <id> [lines]"
local id="$1"
local lines="${2:-1000}"
local name; name=$(session_name "$id")
require_session "$name"
"$TMUX_BIN" capture-pane -t "$name" -p -S "-$lines"
}
cmd_log() {
[ $# -lt 1 ] && die "usage: openclaw-task log <id>"
local id="$1"
local log="$TASK_DIR/$id.log"
[ -f "$log" ] || die "no log file for '$id' (looked at $log)"
cat "$log"
}
cmd_tail() {
[ $# -lt 1 ] && die "usage: openclaw-task tail <id>"
local id="$1"
local log="$TASK_DIR/$id.log"
[ -f "$log" ] || die "no log file for '$id' (looked at $log)"
tail -n 100 -f "$log"
}
cmd_list() {
"$TMUX_BIN" list-sessions -F '#{session_name}' 2>/dev/null \
| grep "^$PREFIX" \
| sed "s|^$PREFIX||" \
|| true
}
cmd_status() {
[ $# -lt 1 ] && die "usage: openclaw-task status <id>"
local id="$1"
local name; name=$(session_name "$id")
if "$TMUX_BIN" has-session -t "$name" 2>/dev/null; then
echo running
else
echo ended
fi
}
cmd_kill() {
[ $# -lt 1 ] && die "usage: openclaw-task kill <id>"
local id="$1"
local name; name=$(session_name "$id")
require_session "$name"
"$TMUX_BIN" kill-session -t "$name"
}
cmd_purge() {
[ $# -lt 1 ] && die "usage: openclaw-task purge <id>"
local id="$1"
local name; name=$(session_name "$id")
"$TMUX_BIN" kill-session -t "$name" 2>/dev/null || true
rm -f "$TASK_DIR/$id.log"
echo "purged: $id"
}
case "${1:-help}" in
new) shift; cmd_new "$@" ;;
claude) shift; cmd_claude "$@" ;;
send) shift; cmd_send "$@" ;;
capture) shift; cmd_capture "$@" ;;
log) shift; cmd_log "$@" ;;
tail) shift; cmd_tail "$@" ;;
list) shift; cmd_list "$@" ;;
status) shift; cmd_status "$@" ;;
kill) shift; cmd_kill "$@" ;;
purge) shift; cmd_purge "$@" ;;
help|-h|--help) usage ;;
*) usage; exit 2 ;;
esac