From ac6f19dd3b7681a86be626d935bc29c640dba281 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Wed, 10 Jun 2026 20:38:59 +0000 Subject: [PATCH] tmux-persist: never let an empty snapshot clobber a saved manifest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit emo's 5 web-terminal tmux sessions were OOM-killed (the server died, no reboot), and the 5-minute save tick then overwrote his session manifest with 0 bytes — wiping the record that restore needs. Root cause: the save guard only checked that the tmux socket *file* existed, but an OOM-killed server leaves a stale /tmp/tmux-/default behind; list-panes then returns nothing and that empty capture was installed over the good manifest. Because the restore service only runs at boot, an OOM (not a reboot) skips restore entirely, so the clobbered manifest was the only record left — and it was already gone. Fix: only overwrite .tsv when the snapshot captured >=1 live session; otherwise keep the last good manifest (now covers no-server AND stale-socket/dead-server). Verified by reproducing the 0-byte clobber on the old script and confirming the new one preserves the manifest, plus a live save that still captures every active session. emo's 5 sessions were recovered from their transcripts and are back; this keeps the next OOM from destroying the manifest again. Co-Authored-By: Claude Opus 4.8 --- scripts/tmux-persist.sh | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/scripts/tmux-persist.sh b/scripts/tmux-persist.sh index 26484587..3ab18030 100644 --- a/scripts/tmux-persist.sh +++ b/scripts/tmux-persist.sh @@ -12,8 +12,9 @@ # transcript fd (~/.claude/projects//.jsonl), so it is # correct regardless of how the session was launched (fresh via # start-claude.sh or an explicit --resume). Runs every 5 min via -# tmux-persist-save.timer. A user with no tmux server keeps their last -# manifest (so a post-reboot save can't wipe it before restore). +# tmux-persist-save.timer. A snapshot that captures no live sessions +# (no server, OR a stale socket left behind by an OOM-killed server) +# keeps the user's last manifest, so it can't be wiped before restore. # restore — recreate manifest sessions that don't currently exist, resuming # each saved conversation (claude --resume ). Per-session # idempotent: existing names are left alone, so it is safe both at @@ -70,10 +71,10 @@ uuid_of_claude() { save() { install -d -m 0755 "$STATE_DIR" - local u uid sess pane_pid pane_cwd cpid uuid tmp + local u uid sess pane_pid pane_cwd cpid uuid tmp n for u in $(users); do uid="$(id -u "$u" 2>/dev/null)" || continue - [[ -S "/tmp/tmux-$uid/default" ]] || continue # no server -> keep last manifest + [[ -S "/tmp/tmux-$uid/default" ]] || continue # no socket at all -> keep last manifest tmp="$(mktemp)" while IFS=$'\t' read -r sess pane_pid pane_cwd; do [[ -n "$sess" ]] || continue @@ -82,8 +83,19 @@ save() { printf '%s\t%s\t%s\n' "$sess" "$pane_cwd" "$uuid" >> "$tmp" done < <(tmux_as "$u" list-panes -a -F $'#{session_name}\t#{pane_pid}\t#{pane_current_path}' 2>/dev/null \ | sort -u -t$'\t' -k1,1) - install -m 0600 "$tmp" "$STATE_DIR/$u.tsv"; rm -f "$tmp" - log "saved $(wc -l < "$STATE_DIR/$u.tsv") session(s) for $u" + # Only overwrite the manifest when we captured >=1 live session. A socket + # file can outlive its server (an OOM-killed tmux server leaves + # /tmp/tmux-/default behind); list-panes then yields nothing, and + # installing that empty result would clobber a good manifest right before + # restore needs it. Empty capture -> keep the last good manifest. + n=$(wc -l < "$tmp") + if (( n > 0 )); then + install -m 0600 "$tmp" "$STATE_DIR/$u.tsv" + log "saved $n session(s) for $u" + else + log "no live sessions for $u (stale socket or dead server) — keeping last manifest" + fi + rm -f "$tmp" done }