From 0b11a28d66948cb7c11ac689bea3c5bf88cd581e Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Mon, 22 Jun 2026 07:59:47 +0000 Subject: [PATCH 1/2] workstation: stop install_memory aborting the reconcile under set -e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit install_memory (added in 44562535) ended with `[[ -d ]] && rm && log` and guarded a chmod with a bare `[[ -f settings ]] && chmod`. When the plugin dir or settings file is absent — the normal case for users who never had the claude-memory plugin — those return non-zero, and under `set -euo pipefail` the function returns non-zero and kills the whole hourly reconcile after the FIRST user, before the rest are processed. It never fired before because the rollout was committed but the deployed /usr/local/bin/t3-provision-users was never updated, so install_memory had never run. On first real run it aborted right after ancamilea, so emo (and wizard) never got their memory hooks wired — the reason emo's sessions lost memory. Wrap the cleanup in an if-block, guard the chmod, and end the function with return 0. Co-Authored-By: Claude Opus 4.8 --- scripts/t3-provision-users.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/t3-provision-users.sh b/scripts/t3-provision-users.sh index 6164235d..fc54faca 100644 --- a/scripts/t3-provision-users.sh +++ b/scripts/t3-provision-users.sh @@ -414,7 +414,7 @@ install_memory() { else log "WARN: memory hook wiring failed for $user (retries next reconcile)" fi - [[ -f "$settings" ]] && chmod 600 "$settings" + [[ -f "$settings" ]] && chmod 600 "$settings" || true # (2b) reuse the user's existing key; warn (do NOT mint — needs an admin vault write) if absent. if [[ -f "$settings" ]] && ! grep -q 'MEMORY_API_KEY' "$settings"; then @@ -425,7 +425,10 @@ install_memory() { if runuser -u "$user" -- bash -lc 'command -v claude >/dev/null 2>&1 && claude mcp get claude_memory >/dev/null 2>&1'; then runuser -u "$user" -- bash -lc 'claude mcp remove claude_memory >/dev/null 2>&1' && log "removed claude_memory MCP -> $user" || true fi - [[ -d "$home/.claude/plugins/claude-memory" ]] && rm -rf "$home/.claude/plugins/claude-memory" && log "removed claude-memory plugin dir -> $user" + if [[ -d "$home/.claude/plugins/claude-memory" ]]; then + rm -rf "$home/.claude/plugins/claude-memory" && log "removed claude-memory plugin dir -> $user" + fi + return 0 # best-effort tail must never return non-zero, else set -euo pipefail aborts the whole reconcile } [[ $EUID -eq 0 ]] || { echo "t3-provision-users: must run as root" >&2; exit 1; } From 92f35550f2c051af31f8da7a1db550a50c4c75a8 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Mon, 22 Jun 2026 08:02:31 +0000 Subject: [PATCH 2/2] workstation: self-deploy t3-provision-users from the repo each reconcile [ci skip] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause of emo's lost memory: nothing redeployed /usr/local/bin/t3-provision-users except the manual setup-devvm.sh, so the homelab-memory rollout (44562535/9aa2438e, Jun 21) sat committed-but-undeployed for a day — the hourly reconcile kept running the pre-memory binary and never wired the new memory hooks for emo/anca. Close the gap the same way the script already treats managed-settings.json and start-claude.sh (sync_managed_config / deploy_user_launcher): the repo is the authoring surface. At the top of the run, if the repo copy differs from the deployed binary, install it and re-exec the fresh one. Guards: a re-exec env flag (no loop), bash -n (never deploy a broken script), DRY_RUN (no mutation), cmp (no churn when unchanged). Verified across all four paths in isolation. Co-Authored-By: Claude Opus 4.8 --- scripts/t3-provision-users.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/scripts/t3-provision-users.sh b/scripts/t3-provision-users.sh index fc54faca..d95e037f 100644 --- a/scripts/t3-provision-users.sh +++ b/scripts/t3-provision-users.sh @@ -434,6 +434,28 @@ install_memory() { [[ $EUID -eq 0 ]] || { echo "t3-provision-users: must run as root" >&2; exit 1; } for bin in python3 jq; do command -v "$bin" >/dev/null || { echo "missing $bin" >&2; exit 1; }; done [[ -f "$ROSTER" && -f "$ENGINE" ]] || { echo "roster/engine not under $WORKSTATION_DIR" >&2; exit 1; } + +# 0) self-deploy: the repo is the authoring surface (like sync_managed_config / +# deploy_user_launcher below). Nothing else redeploys /usr/local/bin (only the +# manual setup-devvm.sh did) — so a committed edit silently never reached the +# hourly run until now (the homelab-memory rollout sat undeployed for a day). +# If the repo copy differs, install it and re-exec the fresh binary. Guarded: +# re-exec flag (no loop), bash -n (never deploy a broken script), DRY_RUN (no +# mutation), cmp (no churn when unchanged). +SELF_SRC="$WORKSTATION_DIR/../t3-provision-users.sh" +SELF_DST=/usr/local/bin/t3-provision-users +if [[ -z "${T3_PROVISION_SELF_DEPLOYED:-}" && -r "$SELF_SRC" ]] && ! cmp -s "$SELF_SRC" "$SELF_DST"; then + if [[ "$DRY_RUN" == 1 ]]; then + echo "[dry-run] self-deploy $SELF_DST from repo (changed)" + elif bash -n "$SELF_SRC" 2>/dev/null; then + install -m 0755 "$SELF_SRC" "$SELF_DST" + log "self-deployed $SELF_DST from repo (changed) — re-exec" + exec env T3_PROVISION_SELF_DEPLOYED=1 "$SELF_DST" "$@" + else + log "WARN: repo t3-provision-users.sh fails 'bash -n' — keeping deployed copy" + fi +fi + install -d -m 0755 "$ENVDIR" # 1) current sticky ports from existing .env files -> {os_user: port}