t3: prepare to adopt 0.0.25 — version-agnostic dispatch + real pairing health-check + state backup [ci skip]

Investigated the 0.0.25 break: it is ONLY an endpoint rename
(/api/auth/bootstrap -> /api/auth/browser-session). The rest of the pairing
contract (credential payload, t3_session cookie, /api/auth/session) is
byte-identical, verified in isolated 0.0.24-vs-0.0.25 sandbox serves. So a
future pin bump is now safe + reversible (pin STAYS 0.0.24 — this is prep):

- t3-dispatch: autoPair tries /api/auth/browser-session, falls back to
  /api/auth/bootstrap on 404 — one binary pairs across both versions and any
  rolling-restart skew. TDD via TestAutoPairAcrossVersions (red on 0.0.25
  before, green after). Built, deployed, verified live on 0.0.24 (all three
  users still 302 + t3_session via the fallback).
- t3-autoupdate.sh: health-check now exercises the REAL mint->credential->cookie
  handshake (was GET / -> 200, which passed the pairing-broken nightly). A bad
  build now auto-rolls-back. Validated against both versions.
- t3-backup-state.{sh,service,timer}: daily online VACUUM INTO of each ~/.t3
  state.sqlite (was the only copy, unbacked) -> the one-way forward schema
  migration becomes a restore, not sqlite surgery. timeout-guarded.
- runbooks/t3-version-bump.md: the reversible cutover checklist.
- post-mortem #5 (health-check) DONE + #6 added; service-catalog updated.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-06-09 20:00:11 +00:00
parent 5ea238c707
commit bccaa08d8e
9 changed files with 311 additions and 19 deletions

View file

@ -113,9 +113,42 @@ func isDocumentNav(r *http.Request) bool {
return strings.Contains(r.Header.Get("Accept"), "text/html")
}
// pairEndpoints are the instance's session-bootstrap paths in preference order.
// t3 renamed /api/auth/bootstrap -> /api/auth/browser-session in 0.0.25; trying the
// new name first and falling back to the old lets ONE dispatch binary pair against
// either version — so the t3 pin can move forward (and survive a rolling-restart
// skew where some instances are already on the new version) without a 502 storm.
var pairEndpoints = []string{"/api/auth/browser-session", "/api/auth/bootstrap"}
// exchangeCredential POSTs the pairing credential to the user's instance, trying
// each pairEndpoint in turn. A 404 means "absent in this t3 version" -> try the
// next; any other status is that endpoint's verdict, returned as-is. Caller owns
// resp.Body.
func exchangeCredential(port int, credential string) (*http.Response, error) {
body, _ := json.Marshal(map[string]string{"credential": credential})
var lastErr error
for _, ep := range pairEndpoints {
resp, err := http.Post(fmt.Sprintf("http://127.0.0.1:%d%s", port, ep),
"application/json", bytes.NewReader(body))
if err != nil {
lastErr = err
continue
}
if resp.StatusCode == http.StatusNotFound {
resp.Body.Close() // endpoint absent in this t3 version — try the next
continue
}
return resp, nil
}
if lastErr != nil {
return nil, lastErr
}
return nil, fmt.Errorf("no pairing endpoint accepted the request (all returned 404)")
}
// autoPair mints a one-time pairing token for the user's instance (as that OS
// user, via the scoped sudoers entry) and exchanges it at the instance's
// /api/auth/bootstrap, relaying the returned t3_session Set-Cookie to the browser.
// user, via the scoped sudoers entry) and exchanges it at the instance's pairing
// endpoint, relaying the returned t3_session Set-Cookie to the browser.
func autoPair(e entry, w http.ResponseWriter, r *http.Request) {
// t3-mint (root, via scoped sudoers) validates the OS user is in
// /etc/ttyd-user-map, then mints as that user. The dispatch service itself
@ -133,16 +166,15 @@ func autoPair(e entry, w http.ResponseWriter, r *http.Request) {
http.Error(w, "unparseable pairing output", http.StatusInternalServerError)
return
}
body, _ := json.Marshal(map[string]string{"credential": pc.Credential})
resp, err := http.Post(fmt.Sprintf("http://127.0.0.1:%d/api/auth/bootstrap", e.Port),
"application/json", bytes.NewReader(body))
resp, err := exchangeCredential(e.Port, pc.Credential)
if err != nil {
log.Printf("pairing exchange for %s failed: %v", e.OsUser, err)
http.Error(w, "bootstrap request failed", http.StatusBadGateway)
return
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.Printf("bootstrap for %s returned %d", e.OsUser, resp.StatusCode)
log.Printf("pairing for %s returned %d", e.OsUser, resp.StatusCode)
http.Error(w, "bootstrap rejected", http.StatusBadGateway)
return
}