workstation: tmux sessions survive devvm reboots (save timer + boot restore)
Viktor: emo's open web-terminal sessions must persist across reboots. Claude conversations were already durable on disk; the volatile part was the tmux wiring (which named session runs which conversation). t3-tmux-sessions save (5-min timer) snapshots every roster user's sessions to /var/lib/t3-tmux-state/<user>.tsv — conversation uuid taken from argv --resume (self-sustaining once restored) or the newest transcript in the cwd-slug project dir created after process start (fresh launcher sessions; claude does NOT hold its transcript fd open, so fd-sniffing was a dead end). t3-tmux-sessions restore (boot oneshot, also safe after partial loss) recreates missing sessions with claude --resume <uuid>. Reconciler self-heals both units' enablement. Verified live: emo's 5 sessions snapshotted with correct uuids; killed R730-cooling -> restore brought it back resuming the same conversation (context meter identical); other sessions untouched. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
59a531b8e0
commit
2e4f48f3fc
6 changed files with 145 additions and 0 deletions
|
|
@ -256,6 +256,11 @@ done < <(jq -r '.ports | to_entries[] | [.key, .value] | @tsv' "$desired_file")
|
|||
# breaking nightly mid-day and took out auth for everyone. `enable` (no --now) just arms
|
||||
# the 04:00 schedule; fresh boxes get t3 from setup-devvm.sh's pinned install, not here.
|
||||
run systemctl enable t3-autoupdate.timer >/dev/null 2>&1 || true
|
||||
# tmux session persistence: periodic snapshot + boot-time restore (reboot
|
||||
# survival for users' named claude sessions). Safe to --now: save is a
|
||||
# read-only snapshot; restore is per-session idempotent.
|
||||
run systemctl enable --now t3-tmux-save.timer >/dev/null 2>&1 || true
|
||||
run systemctl enable t3-tmux-restore.service >/dev/null 2>&1 || true
|
||||
|
||||
# 6) regenerate /etc/ttyd-user-map + dispatch.json from the desired state (SSoT:
|
||||
# a roster entry removed here DISAPPEARS, which is what the offboarding cut relies on)
|
||||
|
|
|
|||
12
scripts/t3-tmux-restore.service
Normal file
12
scripts/t3-tmux-restore.service
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
[Unit]
|
||||
Description=Restore workstation tmux sessions (resume saved claude conversations) after boot
|
||||
After=network.target local-fs.target
|
||||
# Before the save timer's first run (OnBootSec=10min) so an empty post-boot
|
||||
# state can never be snapshotted over the manifest being restored from.
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/t3-tmux-sessions restore
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
6
scripts/t3-tmux-save.service
Normal file
6
scripts/t3-tmux-save.service
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
[Unit]
|
||||
Description=Snapshot workstation tmux sessions (name -> claude conversation) for reboot survival
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/t3-tmux-sessions save
|
||||
10
scripts/t3-tmux-save.timer
Normal file
10
scripts/t3-tmux-save.timer
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
[Unit]
|
||||
Description=Periodic workstation tmux session snapshot
|
||||
|
||||
[Timer]
|
||||
OnBootSec=10min
|
||||
OnCalendar=*:0/5
|
||||
Persistent=false
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
110
scripts/t3-tmux-sessions.sh
Normal file
110
scripts/t3-tmux-sessions.sh
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
#!/usr/bin/env bash
|
||||
# Persist workstation tmux sessions across devvm reboots.
|
||||
#
|
||||
# save — snapshot every roster user's live tmux sessions to
|
||||
# /var/lib/t3-tmux-state/<user>.tsv (name, cwd, claude session
|
||||
# uuid). The uuid is sniffed from the claude process's OPEN
|
||||
# transcript fd (~/.claude/projects/<slug>/<uuid>.jsonl), so it is
|
||||
# correct regardless of how the session was launched (fresh via
|
||||
# start-claude.sh or an explicit --resume). Runs every 5 min via
|
||||
# t3-tmux-save.timer. A user with no tmux server keeps their last
|
||||
# manifest (so a post-reboot save can't wipe it before restore).
|
||||
# restore — recreate manifest sessions that don't currently exist, resuming
|
||||
# each saved conversation (claude --resume <uuid>). Per-session
|
||||
# idempotent: existing names are left alone, so it is safe both at
|
||||
# boot (t3-tmux-restore.service) and after a partial loss.
|
||||
#
|
||||
# v1 limitation: one window/pane per session is captured (the workstation
|
||||
# usage pattern — one named claude conversation per tmux session).
|
||||
set -euo pipefail
|
||||
|
||||
STATE_DIR=/var/lib/t3-tmux-state
|
||||
MAP=/etc/ttyd-user-map
|
||||
MODE="${1:-}"
|
||||
|
||||
log() { echo "[t3-tmux-sessions] $*"; }
|
||||
|
||||
users() { [[ -r "$MAP" ]] && cut -d= -f2 "$MAP" | sort -u; }
|
||||
|
||||
tmux_as() { local u="$1"; shift; runuser -u "$u" -- tmux "$@"; }
|
||||
|
||||
# First descendant of $1 whose comm is `claude` (BFS, bounded by process tree).
|
||||
claude_pid_under() {
|
||||
local q=("$1") pid kids
|
||||
while ((${#q[@]})); do
|
||||
pid="${q[0]}"; q=("${q[@]:1}")
|
||||
[[ "$(ps -o comm= -p "$pid" 2>/dev/null)" == claude ]] && { echo "$pid"; return 0; }
|
||||
read -ra kids <<<"$(pgrep -P "$pid" 2>/dev/null | tr '\n' ' ')" || true
|
||||
((${#kids[@]})) && q+=("${kids[@]}")
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# Conversation uuid of a claude process ($1 pid, $2 user, $3 cwd). Two sources
|
||||
# (claude does NOT hold its transcript fd open, so fd-sniffing doesn't work):
|
||||
# 1. argv `--resume <uuid>` — covers every session this script's restore (or a
|
||||
# manual recovery) created, making the save/restore loop self-sustaining;
|
||||
# 2. newest <uuid>.jsonl in the user's cwd-slug project dir created at/after
|
||||
# the process start — covers fresh launcher-started sessions.
|
||||
# Always returns 0; empty output means "no conversation" (restored as a shell).
|
||||
uuid_of_claude() {
|
||||
local uuid slug dir start f
|
||||
uuid="$(tr '\0' '\n' < "/proc/$1/cmdline" 2>/dev/null \
|
||||
| grep -A1 -x -- '--resume' | tail -1 \
|
||||
| grep -oE '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$' || true)"
|
||||
[[ -n "$uuid" ]] && { echo "$uuid"; return 0; }
|
||||
slug="${3//\//-}"; slug="${slug//./-}"
|
||||
dir="$(getent passwd "$2" | cut -d: -f6)/.claude/projects/$slug"
|
||||
[[ -d "$dir" ]] || return 0
|
||||
start=$(( $(date +%s) - $(ps -o etimes= -p "$1" 2>/dev/null | tr -d ' ' || echo 0) - 5 ))
|
||||
f="$(find "$dir" -maxdepth 1 -name '*.jsonl' -newermt "@$start" -printf '%T@ %f\n' 2>/dev/null \
|
||||
| sort -rn | head -1 | awk '{print $2}' || true)"
|
||||
[[ -n "$f" ]] && echo "${f%.jsonl}"
|
||||
return 0
|
||||
}
|
||||
|
||||
save() {
|
||||
install -d -m 0755 "$STATE_DIR"
|
||||
local u uid sess pane_pid pane_cwd cpid uuid tmp
|
||||
for u in $(users); do
|
||||
uid="$(id -u "$u" 2>/dev/null)" || continue
|
||||
[[ -S "/tmp/tmux-$uid/default" ]] || continue # no server -> keep last manifest
|
||||
tmp="$(mktemp)"
|
||||
while IFS=$'\t' read -r sess pane_pid pane_cwd; do
|
||||
[[ -n "$sess" ]] || continue
|
||||
uuid=""
|
||||
if cpid="$(claude_pid_under "$pane_pid")"; then uuid="$(uuid_of_claude "$cpid" "$u" "$pane_cwd")"; fi
|
||||
printf '%s\t%s\t%s\n' "$sess" "$pane_cwd" "$uuid" >> "$tmp"
|
||||
done < <(tmux_as "$u" list-panes -a -F $'#{session_name}\t#{pane_pid}\t#{pane_current_path}' 2>/dev/null \
|
||||
| sort -u -t$'\t' -k1,1)
|
||||
install -m 0600 "$tmp" "$STATE_DIR/$u.tsv"; rm -f "$tmp"
|
||||
log "saved $(wc -l < "$STATE_DIR/$u.tsv") session(s) for $u"
|
||||
done
|
||||
}
|
||||
|
||||
restore() {
|
||||
local u f sess cwd uuid cmd
|
||||
for u in $(users); do
|
||||
f="$STATE_DIR/$u.tsv"
|
||||
[[ -s "$f" ]] || continue
|
||||
while IFS=$'\t' read -r sess cwd uuid; do
|
||||
[[ -n "$sess" ]] || continue
|
||||
tmux_as "$u" has-session -t "=$sess" 2>/dev/null && continue # already live
|
||||
[[ -d "$cwd" ]] || cwd="$(getent passwd "$u" | cut -d: -f6)"
|
||||
if [[ -n "$uuid" ]]; then
|
||||
cmd="claude --dangerously-skip-permissions --resume $uuid --name \"$sess\"; echo; echo ' claude exited — shell preserved'; exec bash -l"
|
||||
else
|
||||
cmd="exec bash -l"
|
||||
fi
|
||||
tmux_as "$u" new-session -d -s "$sess" -c "$cwd" "$cmd" \
|
||||
&& log "restored $u:$sess${uuid:+ (resume ${uuid:0:8})}" \
|
||||
|| log "WARN: failed to restore $u:$sess"
|
||||
done < "$f"
|
||||
done
|
||||
}
|
||||
|
||||
case "$MODE" in
|
||||
save) save ;;
|
||||
restore) restore ;;
|
||||
*) echo "usage: t3-tmux-sessions save|restore" >&2; exit 1 ;;
|
||||
esac
|
||||
Loading…
Add table
Add a link
Reference in a new issue