t3: prepare to adopt 0.0.25 — version-agnostic dispatch + real pairing health-check + state backup [ci skip]

Investigated the 0.0.25 break: it is ONLY an endpoint rename
(/api/auth/bootstrap -> /api/auth/browser-session). The rest of the pairing
contract (credential payload, t3_session cookie, /api/auth/session) is
byte-identical, verified in isolated 0.0.24-vs-0.0.25 sandbox serves. So a
future pin bump is now safe + reversible (pin STAYS 0.0.24 — this is prep):

- t3-dispatch: autoPair tries /api/auth/browser-session, falls back to
  /api/auth/bootstrap on 404 — one binary pairs across both versions and any
  rolling-restart skew. TDD via TestAutoPairAcrossVersions (red on 0.0.25
  before, green after). Built, deployed, verified live on 0.0.24 (all three
  users still 302 + t3_session via the fallback).
- t3-autoupdate.sh: health-check now exercises the REAL mint->credential->cookie
  handshake (was GET / -> 200, which passed the pairing-broken nightly). A bad
  build now auto-rolls-back. Validated against both versions.
- t3-backup-state.{sh,service,timer}: daily online VACUUM INTO of each ~/.t3
  state.sqlite (was the only copy, unbacked) -> the one-way forward schema
  migration becomes a restore, not sqlite surgery. timeout-guarded.
- runbooks/t3-version-bump.md: the reversible cutover checklist.
- post-mortem #5 (health-check) DONE + #6 added; service-catalog updated.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-06-09 20:00:11 +00:00
parent 5ea238c707
commit bccaa08d8e
9 changed files with 311 additions and 19 deletions

View file

@ -113,9 +113,42 @@ func isDocumentNav(r *http.Request) bool {
return strings.Contains(r.Header.Get("Accept"), "text/html")
}
// pairEndpoints are the instance's session-bootstrap paths in preference order.
// t3 renamed /api/auth/bootstrap -> /api/auth/browser-session in 0.0.25; trying the
// new name first and falling back to the old lets ONE dispatch binary pair against
// either version — so the t3 pin can move forward (and survive a rolling-restart
// skew where some instances are already on the new version) without a 502 storm.
var pairEndpoints = []string{"/api/auth/browser-session", "/api/auth/bootstrap"}
// exchangeCredential POSTs the pairing credential to the user's instance, trying
// each pairEndpoint in turn. A 404 means "absent in this t3 version" -> try the
// next; any other status is that endpoint's verdict, returned as-is. Caller owns
// resp.Body.
func exchangeCredential(port int, credential string) (*http.Response, error) {
body, _ := json.Marshal(map[string]string{"credential": credential})
var lastErr error
for _, ep := range pairEndpoints {
resp, err := http.Post(fmt.Sprintf("http://127.0.0.1:%d%s", port, ep),
"application/json", bytes.NewReader(body))
if err != nil {
lastErr = err
continue
}
if resp.StatusCode == http.StatusNotFound {
resp.Body.Close() // endpoint absent in this t3 version — try the next
continue
}
return resp, nil
}
if lastErr != nil {
return nil, lastErr
}
return nil, fmt.Errorf("no pairing endpoint accepted the request (all returned 404)")
}
// autoPair mints a one-time pairing token for the user's instance (as that OS
// user, via the scoped sudoers entry) and exchanges it at the instance's
// /api/auth/bootstrap, relaying the returned t3_session Set-Cookie to the browser.
// user, via the scoped sudoers entry) and exchanges it at the instance's pairing
// endpoint, relaying the returned t3_session Set-Cookie to the browser.
func autoPair(e entry, w http.ResponseWriter, r *http.Request) {
// t3-mint (root, via scoped sudoers) validates the OS user is in
// /etc/ttyd-user-map, then mints as that user. The dispatch service itself
@ -133,16 +166,15 @@ func autoPair(e entry, w http.ResponseWriter, r *http.Request) {
http.Error(w, "unparseable pairing output", http.StatusInternalServerError)
return
}
body, _ := json.Marshal(map[string]string{"credential": pc.Credential})
resp, err := http.Post(fmt.Sprintf("http://127.0.0.1:%d/api/auth/bootstrap", e.Port),
"application/json", bytes.NewReader(body))
resp, err := exchangeCredential(e.Port, pc.Credential)
if err != nil {
log.Printf("pairing exchange for %s failed: %v", e.OsUser, err)
http.Error(w, "bootstrap request failed", http.StatusBadGateway)
return
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.Printf("bootstrap for %s returned %d", e.OsUser, resp.StatusCode)
log.Printf("pairing for %s returned %d", e.OsUser, resp.StatusCode)
http.Error(w, "bootstrap rejected", http.StatusBadGateway)
return
}

View file

@ -117,6 +117,8 @@ func fakeInstance(authenticated bool, bootstrapCalled *bool) *httptest.Server {
}
http.SetCookie(w, &http.Cookie{Name: cookieName, Value: "fresh", Path: "/"})
_, _ = w.Write([]byte(`{"authenticated":true}`))
case "/api/auth/browser-session":
http.NotFound(w, r) // models a 0.0.24 instance: the 0.0.25 endpoint is absent
default:
_, _ = w.Write([]byte("APP"))
}
@ -198,3 +200,61 @@ func TestHandlerProxiesXHREvenIfCookieInvalid(t *testing.T) {
t.Fatalf("XHR should proxy through, got %d %q", w.Code, w.Body.String())
}
}
// pairInstance simulates a t3 instance that exposes pairing at exactly one path
// (200 + t3_session) and 404s the other known path — modeling the 0.0.25 rename of
// /api/auth/bootstrap -> /api/auth/browser-session. records which path was hit.
func pairInstance(pairPath string, hit *string) *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/api/auth/browser-session", "/api/auth/bootstrap":
if r.URL.Path != pairPath {
http.NotFound(w, r) // endpoint absent in this t3 version
return
}
if hit != nil {
*hit = r.URL.Path
}
http.SetCookie(w, &http.Cookie{Name: cookieName, Value: "fresh", Path: "/"})
_, _ = w.Write([]byte(`{"authenticated":true}`))
default:
http.NotFound(w, r)
}
}))
}
// TestAutoPairAcrossVersions: one dispatch binary must pair against BOTH the
// 0.0.24 endpoint (/api/auth/bootstrap) and the 0.0.25 one (/api/auth/browser-session),
// so the pin can move forward (and survive rolling-restart skew) without a 502 storm.
func TestAutoPairAcrossVersions(t *testing.T) {
orig := mintToken
mintToken = func(string) ([]byte, error) { return []byte(`{"credential":"tok"}`), nil }
defer func() { mintToken = orig }()
for _, tc := range []struct{ name, pairPath string }{
{"0.0.25 browser-session", "/api/auth/browser-session"},
{"0.0.24 bootstrap", "/api/auth/bootstrap"},
} {
t.Run(tc.name, func(t *testing.T) {
var hit string
ts := pairInstance(tc.pairPath, &hit)
defer ts.Close()
setTable(portOf(t, ts))
r := httptest.NewRequest("GET", "/", nil)
r.Header.Set("X-authentik-username", "vbarzin@gmail.com") // no cookie -> autoPair
w := httptest.NewRecorder()
handler(w, r)
if w.Code != http.StatusFound {
t.Fatalf("want 302 re-pair, got %d body=%q", w.Code, w.Body.String())
}
if hit != tc.pairPath {
t.Fatalf("want pairing via %s, hit=%q", tc.pairPath, hit)
}
if cs := w.Result().Cookies(); len(cs) == 0 || cs[0].Value != "fresh" {
t.Fatalf("want fresh t3_session relayed, got %+v", cs)
}
})
}
}