From 06e400522f3a11538524675a02dcc17f026eb3b1 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 21 Jun 2026 12:34:11 +0000 Subject: [PATCH] t3-migrate-idle: idle gate (no in-flight turn + quiet buffer), TDD The gate reads t3's state.sqlite: safe to restart only when zero threads have an active_turn_id AND the most-recent thread activity is older than the quiet buffer (default 15m). Fail-closed on any parse/query error. Pure-bash unit tests cover the boundaries against fixture DBs (no root/bats/Docker). Co-Authored-By: Claude Opus 4.8 --- scripts/t3-migrate-idle.sh | 56 ++++++++++++++++++++++++++++++ tests/t3-migrate-idle-gate.test.sh | 44 +++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 scripts/t3-migrate-idle.sh create mode 100644 tests/t3-migrate-idle-gate.test.sh diff --git a/scripts/t3-migrate-idle.sh b/scripts/t3-migrate-idle.sh new file mode 100644 index 00000000..f17431e8 --- /dev/null +++ b/scripts/t3-migrate-idle.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# t3-migrate-idle.sh — drains t3-autoupdate's deferral markers (via the overnight +# t3-migrate-idle.timer). For each deferred t3-serve@, if nothing is actively +# working in that instance (no in-flight turn + a quiet buffer), restart it onto the +# current binary using the shared safe_restart_unit, then clear the marker. +# Why this exists: t3-autoupdate defers a user with an active agent at its single +# daily window; a user busy every night never migrates and their client shows +# "Client and server versions differ". See docs/plans/2026-06-21-t3-idle-migrate-*. +set -uo pipefail + +LOG_TAG=t3-migrate-idle +# shellcheck source=scripts/t3-safe-restart.sh +. "${T3_SAFE_RESTART_LIB:-/usr/local/lib/t3-safe-restart.sh}" + +QUIET_SECONDS="${T3_MIGRATE_QUIET_SECONDS:-900}" # required idle before a restart (15 min) +DRY_RUN="${T3_DRY_RUN:-0}" + +# pure logic: is it safe given and ? fail closed. +gate_is_safe() { + local active="$1" idle="$2" + case "$active" in ''|*[!0-9]*) return 1;; esac # unparseable/empty active -> unsafe + [ "$active" -eq 0 ] || return 1 # a turn is running -> unsafe + [ -z "$idle" ] && return 0 # no threads at all -> safe + case "$idle" in ''|*[!0-9-]*) return 1;; esac # non-numeric -> unsafe + [ "$idle" -ge "$QUIET_SECONDS" ] # negative or < quiet -> unsafe +} + +# query a state.sqlite (path or file: URI). Echoes "|". +# idle_seconds is empty when there are no rows. Normalizes ISO 'T'/'Z' for julianday. +gate_query() { + local db="$1" + sqlite3 -batch -noheader -separator '|' "$db" \ + "SELECT + (SELECT count(*) FROM projection_thread_sessions WHERE active_turn_id IS NOT NULL), + CAST((julianday('now') - julianday(replace(replace(max(updated_at),'T',' '),'Z',''))) * 86400 AS INT) + FROM projection_thread_sessions;" +} + +# safe_to_restart : wire runuser + the user's DB into gate_query/gate_is_safe. +safe_to_restart() { + local u="$1" db row + db="/home/$u/.t3/userdata/state.sqlite"; [ -f "$db" ] || return 1 + row="$(runuser -u "$u" -- sqlite3 -batch -noheader -separator '|' "file:$db?mode=ro" \ + "SELECT + (SELECT count(*) FROM projection_thread_sessions WHERE active_turn_id IS NOT NULL), + CAST((julianday('now') - julianday(replace(replace(max(updated_at),'T',' '),'Z',''))) * 86400 AS INT) + FROM projection_thread_sessions;" 2>/dev/null)" || return 1 + gate_is_safe "${row%%|*}" "${row##*|}" +} + +main() { + : # drain loop added in Task 4 +} + +# main-guard: run only when executed, not when sourced (tests source this file). +if [ "${BASH_SOURCE[0]}" = "${0}" ]; then main "$@"; fi diff --git a/tests/t3-migrate-idle-gate.test.sh b/tests/t3-migrate-idle-gate.test.sh new file mode 100644 index 00000000..a36f95cb --- /dev/null +++ b/tests/t3-migrate-idle-gate.test.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Pure-bash unit tests for the t3-migrate-idle gate. No root, no bats, no Docker. +# Sources t3-migrate-idle.sh (main-guarded) with the lib path pointed at the worktree. +set -uo pipefail +HERE="$(cd "$(dirname "$0")/.." && pwd)" # repo root (tests/ is one level down) +export T3_SAFE_RESTART_LIB="$HERE/scripts/t3-safe-restart.sh" +# shellcheck source=/dev/null +. "$HERE/scripts/t3-migrate-idle.sh" # defines functions; main-guard prevents the drain from running + +pass=0; fail=0 +ok() { if "$@"; then pass=$((pass+1)); else fail=$((fail+1)); echo "FAIL: $*"; fi; } +notok(){ if "$@"; then fail=$((fail+1)); echo "FAIL (expected non-zero): $*"; else pass=$((pass+1)); fi; } + +# --- gate_is_safe with QUIET_SECONDS=900 --- +QUIET_SECONDS=900 +ok gate_is_safe 0 1000 # idle, quiet long enough -> safe +notok gate_is_safe 1 1000 # a turn in flight -> unsafe +notok gate_is_safe 0 100 # idle but not quiet enough -> unsafe +ok gate_is_safe 0 "" # no threads at all (NULL idle) -> safe +notok gate_is_safe x 1000 # unparseable active -> unsafe +notok gate_is_safe 0 -30 # negative idle (clock skew) -> unsafe + +# --- gate_query against fixture SQLite DBs --- +TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT +mkfix() { # mkfix ; reads rows "active_turn_id|updated_at" on stdin + local f="$1"; sqlite3 "$f" "CREATE TABLE projection_thread_sessions(active_turn_id TEXT, updated_at TEXT NOT NULL);" + while IFS='|' read -r a u; do sqlite3 "$f" "INSERT INTO projection_thread_sessions VALUES ($([ "$a" = NULL ] && echo NULL || echo "'$a'"), '$u');"; done +} +NOW="$(date -u +%Y-%m-%dT%H:%M:%S.000Z)" +OLD="$(date -u -d '1 hour ago' +%Y-%m-%dT%H:%M:%S.000Z)" + +# active turn present -> "1|" +printf '%s\n' "abc|$NOW" "NULL|$OLD" | mkfix "$TMP/active.db" +res="$(gate_query "$TMP/active.db")"; ok test "${res%%|*}" = "1" + +# all idle, last activity 1h ago -> "0|>=3500" +printf '%s\n' "NULL|$OLD" "NULL|$OLD" | mkfix "$TMP/idle.db" +res="$(gate_query "$TMP/idle.db")"; ok test "${res%%|*}" = "0"; ok test "${res##*|}" -ge 3500 + +# empty table -> "0|" (NULL idle) +sqlite3 "$TMP/empty.db" "CREATE TABLE projection_thread_sessions(active_turn_id TEXT, updated_at TEXT NOT NULL);" +res="$(gate_query "$TMP/empty.db")"; ok test "${res%%|*}" = "0" + +echo "PASS=$pass FAIL=$fail"; [ "$fail" -eq 0 ]