openclaw: SSH + tmux task fallback to devvm

Give the OpenClaw pod two new capabilities:

1. Host-tools bundle. New init container `install-host-tools` extracts
   openssh-client + dnsutils + tmux + jq + ripgrep + fd + vault + yq +
   friends into /tools/host-tools/, with the bookworm-slim libs the
   binaries need. PATH + LD_LIBRARY_PATH on the main container point
   ld.so at the bundle. Idempotent via /tools/host-tools/.installed-v1
   marker; smoke test (ldd-based) fails the init at deploy time if any
   binary has unresolved deps. Bundle is ~558 MB on the existing
   /srv/nfs/openclaw/tools NFS.

2. devvm SSH + async task pattern. New init `setup-ssh-config` writes
   id_rsa/config/known_hosts under /home/node/.openclaw/.ssh; main
   container startup symlinks /home/node/.ssh → there. New
   /usr/local/bin/openclaw-task wrapper on devvm manages long-running
   work as tmux sessions on devvm (sessions and logs survive pod
   restarts — they live on devvm, not in the pod). New init container
   `seed-devvm-memory-note` drops a markdown note teaching the pattern;
   main container startup now runs `openclaw memory index --force` so
   the note is searchable on first boot.

Design + verified E2E flow in
docs/plans/2026-05-22-openclaw-devvm-access-design.md. Persistence test
green: spawned a 50s task from pod A, deleted pod A, new pod B saw the
task finish and read its full log.

Pre-existing keel.sh annotation drift on openclaw/{openlobster,
task_webhook} cleaned up in the same apply.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-05-22 10:20:00 +00:00
parent c7b0ebf6a5
commit 9ad52dfd61
3 changed files with 694 additions and 2 deletions

View file

@ -0,0 +1,184 @@
#!/usr/bin/env bash
# openclaw-task — manage long-running tmux tasks on devvm
#
# Canonical source: infra/stacks/openclaw/files/openclaw-task.sh
# Installed to /usr/local/bin/openclaw-task on devvm so non-interactive
# SSH (e.g. `ssh devvm openclaw-task list`) finds it on the default PATH.
#
# Sessions are prefixed `openclaw-task-` to avoid colliding with the
# user's own tmux work. Persistent transcripts live in
# ~/openclaw-tasks/<id>.log via `tmux pipe-pane`. Sessions and logs
# survive OpenClaw pod restarts (they live on devvm, not in the pod).
set -euo pipefail
# Use full paths because non-interactive SSH does not source ~/.profile
# or ~/.bashrc (see memory id=740).
TMUX_BIN=/usr/bin/tmux
CLAUDE_BIN=/usr/local/bin/claude # installed as symlink to /home/wizard/.local/bin/claude
PREFIX=openclaw-task-
TASK_DIR=${OPENCLAW_TASK_DIR:-$HOME/openclaw-tasks}
mkdir -p "$TASK_DIR"
die() { echo "openclaw-task: $*" >&2; exit 1; }
session_name() { printf 'openclaw-task-%s' "$1"; }
require_session() {
local name="$1"
"$TMUX_BIN" has-session -t "$name" 2>/dev/null || die "no session '$name' (use 'openclaw-task list')"
}
usage() {
cat <<EOF
openclaw-task — manage long-running tmux tasks on devvm
USAGE
openclaw-task new <id> <command...> spawn detached tmux session
openclaw-task claude <id> [prompt...] spawn interactive claude in a session;
if prompt given, send-keys it + Enter
openclaw-task send <id> <keys...> tmux send-keys passthrough (you must
pass 'Enter' literal for newline)
openclaw-task capture <id> [lines] last <lines> of pane (default 1000)
openclaw-task log <id> cat the persistent pipe-pane log
openclaw-task tail <id> tail -f the persistent log
openclaw-task list all openclaw task ids (one per line)
openclaw-task status <id> 'running' or 'ended'
openclaw-task kill <id> kill session (log file kept)
openclaw-task purge <id> kill + delete log file
EXAMPLES
openclaw-task new build-foo "cd ~/code/foo && make all 2>&1"
openclaw-task claude diag-frigate
openclaw-task send diag-frigate "investigate gpu crashloop" Enter
openclaw-task capture diag-frigate 200
openclaw-task list
EOF
}
cmd_new() {
[ $# -lt 2 ] && die "usage: openclaw-task new <id> <command...>"
local id="$1"; shift
local name; name=$(session_name "$id")
if "$TMUX_BIN" has-session -t "$name" 2>/dev/null; then
die "session '$name' already exists"
fi
local log="$TASK_DIR/$id.log"
: > "$log"
# Start an idle interactive bash so pipe-pane can attach BEFORE the
# user's command runs. If we passed the command directly to
# new-session, its first lines beat pipe-pane to the pane and never
# land in the log.
"$TMUX_BIN" new-session -d -s "$name" bash --norc -i
"$TMUX_BIN" pipe-pane -o -t "$name" "cat >> '$log'"
sleep 0.2
"$TMUX_BIN" send-keys -t "$name" "$*" Enter
# Auto-exit propagating the command's status so the tmux session
# ends when the command does.
"$TMUX_BIN" send-keys -t "$name" 'exit $?' Enter
printf 'session: %s\nlog: %s\n' "$name" "$log"
}
cmd_claude() {
[ $# -lt 1 ] && die "usage: openclaw-task claude <id> [prompt...]"
local id="$1"; shift
local name; name=$(session_name "$id")
if "$TMUX_BIN" has-session -t "$name" 2>/dev/null; then
die "session '$name' already exists (use 'send' to add prompts)"
fi
local log="$TASK_DIR/$id.log"
: > "$log"
# sleep+exec lets pipe-pane attach before claude prints its banner.
"$TMUX_BIN" new-session -d -s "$name" bash -c "sleep 0.3; exec '$CLAUDE_BIN'"
"$TMUX_BIN" pipe-pane -o -t "$name" "cat >> '$log'"
if [ $# -gt 0 ]; then
# Wait for claude to come up before sending the prompt
sleep 2
"$TMUX_BIN" send-keys -t "$name" "$*" Enter
fi
printf 'session: %s\nlog: %s\n' "$name" "$log"
}
cmd_send() {
[ $# -lt 2 ] && die "usage: openclaw-task send <id> <keys...>"
local id="$1"; shift
local name; name=$(session_name "$id")
require_session "$name"
"$TMUX_BIN" send-keys -t "$name" "$@"
}
cmd_capture() {
[ $# -lt 1 ] && die "usage: openclaw-task capture <id> [lines]"
local id="$1"
local lines="${2:-1000}"
local name; name=$(session_name "$id")
require_session "$name"
"$TMUX_BIN" capture-pane -t "$name" -p -S "-$lines"
}
cmd_log() {
[ $# -lt 1 ] && die "usage: openclaw-task log <id>"
local id="$1"
local log="$TASK_DIR/$id.log"
[ -f "$log" ] || die "no log file for '$id' (looked at $log)"
cat "$log"
}
cmd_tail() {
[ $# -lt 1 ] && die "usage: openclaw-task tail <id>"
local id="$1"
local log="$TASK_DIR/$id.log"
[ -f "$log" ] || die "no log file for '$id' (looked at $log)"
tail -n 100 -f "$log"
}
cmd_list() {
"$TMUX_BIN" list-sessions -F '#{session_name}' 2>/dev/null \
| grep "^$PREFIX" \
| sed "s|^$PREFIX||" \
|| true
}
cmd_status() {
[ $# -lt 1 ] && die "usage: openclaw-task status <id>"
local id="$1"
local name; name=$(session_name "$id")
if "$TMUX_BIN" has-session -t "$name" 2>/dev/null; then
echo running
else
echo ended
fi
}
cmd_kill() {
[ $# -lt 1 ] && die "usage: openclaw-task kill <id>"
local id="$1"
local name; name=$(session_name "$id")
require_session "$name"
"$TMUX_BIN" kill-session -t "$name"
}
cmd_purge() {
[ $# -lt 1 ] && die "usage: openclaw-task purge <id>"
local id="$1"
local name; name=$(session_name "$id")
"$TMUX_BIN" kill-session -t "$name" 2>/dev/null || true
rm -f "$TASK_DIR/$id.log"
echo "purged: $id"
}
case "${1:-help}" in
new) shift; cmd_new "$@" ;;
claude) shift; cmd_claude "$@" ;;
send) shift; cmd_send "$@" ;;
capture) shift; cmd_capture "$@" ;;
log) shift; cmd_log "$@" ;;
tail) shift; cmd_tail "$@" ;;
list) shift; cmd_list "$@" ;;
status) shift; cmd_status "$@" ;;
kill) shift; cmd_kill "$@" ;;
purge) shift; cmd_purge "$@" ;;
help|-h|--help) usage ;;
*) usage; exit 2 ;;
esac

View file

@ -496,6 +496,223 @@ resource "kubernetes_deployment" "openclaw" {
}
}
# Init 4: install host-tools bundle (ssh, vault, jq, ripgrep, tmux, )
# into /tools/host-tools/ so the in-pod agent reaches CLI parity
# with the dev VM. Upstream OpenClaw image is minimal Debian
# bookworm running as uid 1000 can't apt-install at runtime.
# Idempotent via marker file; bump suffix to force reinstall.
# See docs/plans/2026-05-22-openclaw-devvm-access-design.md.
init_container {
name = "install-host-tools"
image = "debian:bookworm-slim"
command = ["bash", "-c", <<-EOT
set -euo pipefail
DEST=/tools/host-tools
MARKER="$DEST/.installed-v1"
if [ -f "$MARKER" ]; then
echo "host-tools v1 already installed (skipping)"
exit 0
fi
echo "installing host-tools v1 ..."
rm -rf "$DEST"
mkdir -p "$DEST/root" "$DEST/bin"
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
# debian:bookworm-slim doesn't ship wget/unzip; install
# transiently into this init container's filesystem so we
# can download the static binaries below.
apt-get install -y --no-install-recommends wget unzip ca-certificates
# NOTE: we deliberately do NOT pass --no-install-recommends to
# the download step. ssh links against libgssapi-krb5-2 which
# is a hard Depends but its transitive deps (libkrb5-3 etc.)
# need to come along too. The bundle is a self-contained
# /usr-like tree that the openclaw container can use via
# LD_LIBRARY_PATH, so missing deps = broken binaries.
APT_PKGS="openssh-client dnsutils iputils-ping wget gnupg jq ripgrep fd-find ncdu htop strace tcpdump tmux unzip ca-certificates"
apt-get install -y --download-only $APT_PKGS
for d in /var/cache/apt/archives/*.deb; do
dpkg-deb -x "$d" "$DEST/root/"
done
VAULT_VER=1.18.3
YQ_VER=v4.44.3
wget -qO /tmp/vault.zip \
"https://releases.hashicorp.com/vault/$${VAULT_VER}/vault_$${VAULT_VER}_linux_amd64.zip"
unzip -o /tmp/vault.zip vault -d "$DEST/bin/"
chmod +x "$DEST/bin/vault"
wget -qO "$DEST/bin/yq" \
"https://github.com/mikefarah/yq/releases/download/$${YQ_VER}/yq_linux_amd64"
chmod +x "$DEST/bin/yq"
# Smoke test fail init if any bundled binary has unresolved
# shared-lib deps, so glibc / shared-lib drift surfaces at
# deploy time. We don't run --version because flag support
# varies (older scp returns non-zero, ping/nslookup use weird
# conventions). ldd is the reliable signal: if any "not
# found" appears, the binary won't load when called.
# LD_LIBRARY_PATH points ld.so at the bundled libs (the
# openclaw main container sets the same env).
export PATH="$DEST/root/usr/bin:$DEST/root/usr/sbin:$DEST/root/bin:$DEST/root/sbin:$DEST/bin:$PATH"
export LD_LIBRARY_PATH="$DEST/root/usr/lib/x86_64-linux-gnu:$DEST/root/lib/x86_64-linux-gnu"
for t in ssh scp ssh-keyscan dig host nslookup ping wget gpg jq rg fdfind tmux vault yq; do
bin=$(command -v "$t" 2>/dev/null) || { echo "FAIL: $t not on PATH"; exit 1; }
if ldd "$bin" 2>&1 | grep -q "not found"; then
echo "FAIL: $t has unresolved shared libs:"
ldd "$bin"
exit 1
fi
echo "OK: $t"
done
chown -R 1000:1000 "$DEST"
touch "$MARKER"
echo "host-tools v1 install complete ($(du -sh "$DEST" | cut -f1))"
EOT
]
volume_mount {
name = "tools"
mount_path = "/tools"
}
resources {
requests = { cpu = "100m", memory = "256Mi" }
limits = { memory = "512Mi" }
}
}
# Init 5: write /home/node/.openclaw/.ssh/{id_rsa,config,known_hosts}
# so the agent can `ssh devvm` without device-trust prompts. The
# main container symlinks /home/node/.ssh here at startup so
# the ssh client picks it up via $HOME/.ssh. Installs
# openssh-client transiently into this init container so
# ssh-keyscan works without LD_LIBRARY_PATH gymnastics.
init_container {
name = "setup-ssh-config"
image = "debian:bookworm-slim"
command = ["bash", "-c", <<-EOT
set -euo pipefail
SSH=/home/node/.openclaw/.ssh
MARKER="$SSH/.configured-v1"
if [ -f "$MARKER" ]; then
echo "ssh-config v1 already set up (skipping)"
exit 0
fi
echo "installing openssh-client for ssh-keyscan ..."
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
apt-get install -y --no-install-recommends openssh-client >/dev/null
echo "configuring ssh ..."
mkdir -p "$SSH"
# Copy the secret-mounted private key into ~/.ssh with 0600
# the secret's tmpfs mount has wider perms (1777 + symlinks)
# that openssh refuses.
cp /ssh/id_rsa "$SSH/id_rsa"
chmod 0600 "$SSH/id_rsa"
cat > "$SSH/config" <<'SSH_EOF'
Host devvm
HostName 10.0.10.10
User wizard
IdentityFile ~/.ssh/id_rsa
UserKnownHostsFile ~/.ssh/known_hosts
StrictHostKeyChecking yes
SSH_EOF
chmod 0600 "$SSH/config"
ssh-keyscan -H 10.0.10.10 > "$SSH/known_hosts" 2>/tmp/keyscan.err
if [ ! -s "$SSH/known_hosts" ]; then
echo "ssh-keyscan produced empty known_hosts; stderr:"
cat /tmp/keyscan.err
exit 1
fi
chmod 0644 "$SSH/known_hosts"
chown -R 1000:1000 "$SSH"
touch "$MARKER"
echo "ssh-config v1 set up"
EOT
]
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
}
volume_mount {
name = "ssh-key"
mount_path = "/ssh"
}
resources {
requests = { cpu = "50m", memory = "64Mi" }
limits = { memory = "256Mi" }
}
}
# Init 6: seed the devvm-fallback memory note into
# /workspace/memory/projects/openclaw-runtime/. The note teaches
# openclaw the SSH+tmux pattern. The main container's startup
# runs `memory index --force` so it's searchable immediately;
# the daily memory-sync CronJob also keeps it indexed afterward.
# Always rewrites the configmap-baked note is canonical.
init_container {
name = "seed-devvm-memory-note"
image = "busybox:1.37"
command = ["sh", "-c", <<-EOT
set -eu
DIR=/workspace/memory/projects/openclaw-runtime
mkdir -p "$DIR"
cat > "$DIR/devvm-fallback.md" <<'NOTE_EOF'
# Using devvm as a fallback
When in-pod tools/permissions block you, SSH to devvm and use it.
Devvm runs as wizard with full home-lab access (Vault, kubectl,
git repos, Cloudflare, etc.) and Claude Code v2+ is installed.
## One-shot lookup
ssh devvm 'vault kv get -field=brave_api_key secret/openclaw'
ssh devvm 'claude -p "investigate why frigate is restarting"'
## Long-running async work USE THIS for anything > ~2 min
Spawn in a tmux session on devvm. Sessions and logs survive
OpenClaw pod restarts (they live on devvm, not in this pod).
# spawn
ssh devvm openclaw-task new my-task "claude -p --dangerously-skip-permissions 'do the thing'"
# poll progress (last 1000 lines of pane)
ssh devvm openclaw-task capture my-task
# interactive claude (send follow-up prompts)
ssh devvm openclaw-task claude my-task "initial prompt"
ssh devvm openclaw-task send my-task "follow-up prompt" Enter
# housekeeping
ssh devvm openclaw-task list
ssh devvm openclaw-task status my-task
ssh devvm openclaw-task kill my-task
Logs persist at ~/openclaw-tasks/<id>.log on devvm even after a
session is killed. Use `ssh devvm openclaw-task log <id>` to
retrieve them.
NOTE_EOF
chown -R 1000:1000 "$DIR"
echo "memory note seeded at $DIR/devvm-fallback.md"
EOT
]
volume_mount {
name = "workspace"
mount_path = "/workspace"
}
resources {
requests = { cpu = "10m", memory = "32Mi" }
limits = { memory = "32Mi" }
}
}
# Main container: OpenClaw
container {
name = "openclaw"
@ -511,6 +728,11 @@ resource "kubernetes_deployment" "openclaw" {
# others hard-coded.
# 4. gateway exec into the gateway process
command = ["sh", "-c", <<-EOC
# Symlink /home/node/.ssh persistent .ssh so the ssh client
# finds id_rsa/config/known_hosts via $HOME/.ssh. HOME is
# /home/node (image overlay), .ssh files live on the PVC
# at /home/node/.openclaw/.ssh (set up by init 5).
ln -sfn /home/node/.openclaw/.ssh /home/node/.ssh
node openclaw.mjs doctor --fix 2>/dev/null
node openclaw.mjs models set openai-codex/gpt-5.4-mini 2>/dev/null
node openclaw.mjs mcp set ha "{\"url\":\"$HA_SOFIA_MCP_URL\",\"transport\":\"streamable-http\"}" 2>/dev/null
@ -522,6 +744,10 @@ resource "kubernetes_deployment" "openclaw" {
echo '{"plugins":{"allow":["memory-core","recruiter-api","telegram","openrouter","brave","openai","codex"]}}' \
| node openclaw.mjs config patch --stdin 2>/dev/null || true
node openclaw.mjs plugins enable recruiter-api 2>/dev/null || true
# Reindex memory-core so the seeded devvm-fallback note (and
# anything else dropped under /workspace/memory/) is searchable
# on first boot; daily memory-sync CronJob also keeps it indexed.
node openclaw.mjs memory index --force 2>/dev/null || true
exec node openclaw.mjs gateway --allow-unconfigured --bind lan
EOC
]
@ -544,8 +770,21 @@ resource "kubernetes_deployment" "openclaw" {
value = random_password.gateway_token.result
}
env {
name = "PATH"
value = "/tools:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
name = "PATH"
# Host-tools bundle (installed by init 4: install-host-tools)
# comes first so ssh/scp/dig/vault/jq/etc. resolve to the
# extracted Debian binaries + the static-binary downloads.
# /bin + /sbin are needed because iputils-ping installs ping
# under /bin (not /usr/bin) on Debian.
value = "/tools/host-tools/root/usr/bin:/tools/host-tools/root/usr/sbin:/tools/host-tools/root/bin:/tools/host-tools/root/sbin:/tools/host-tools/bin:/tools:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
}
env {
# Point ld.so at the bundled libs so the host-tools binaries
# find their shared-lib deps (libgssapi_krb5, libkrb5, etc.).
# Both base images are bookworm so the libs match the
# openclaw image's libc/libssl no ABI conflicts expected.
name = "LD_LIBRARY_PATH"
value = "/tools/host-tools/root/usr/lib/x86_64-linux-gnu:/tools/host-tools/root/lib/x86_64-linux-gnu"
}
env {
name = "TF_VAR_prod"