homelab vault: self-default VAULT_ADDR + prefer scoped token over ~/.vault-token

Setting up emo's Bitwarden access via `homelab vault`, his one-time
`homelab vault setup` failed with an opaque "exit status 2". Two latent
CLI bugs, both of which any non-admin AFK invocation can hit:

1. The CLI set VAULT_TOKEN but never VAULT_ADDR, relying on the ambient
   value. It IS in /etc/environment (login shells), but emo runs his
   agents from long-lived tmux / non-login shells that never sourced it,
   so every `vault` child hit the 127.0.0.1:8200 default -> connection
   refused. claude-auth-sync already self-defaults VAULT_ADDR; the CLI
   now does the same.

2. Token precedence was env > ~/.vault-token > scoped. A power-user who
   ran `vault login -method=oidc` carries a read-only ~/.vault-token
   (policy `default`, capability `deny` on their workstation path), which
   shadowed the purpose-built scoped token -> 403 permission denied on
   the user's OWN path. This tool only ever touches
   secret/workstation/claude-users/<user>, which the scoped token covers
   exactly, so precedence is now env > scoped > ~/.vault-token. Verified
   the scoped tokens for both emo and wizard hold create/read/update on
   their own paths, so admins are unaffected.

Also stop swallowing the shelled `vault`/`bw` stderr: errors now carry
the real message (connection refused / permission denied) instead of a
bare "exit status N" — without that, (1) and (2) were indistinguishable.

Verified end-to-end as emo (VAULT_ADDR unset + his read-only
~/.vault-token present): writeCreds now succeeds.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-06-28 09:04:28 +00:00
parent c70810a51b
commit 0525f0b12d
3 changed files with 199 additions and 23 deletions

View file

@ -4,6 +4,7 @@ import (
"bufio"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"os"
"os/exec"
@ -79,7 +80,33 @@ func realRunner(name string, argv, envv []string) (string, error) {
out, err := cmd.Output()
// Trim only the trailing newline the tool appends — NOT all whitespace, so a
// fetched secret with significant leading/trailing spaces is preserved.
return strings.TrimRight(string(out), "\r\n"), err
return strings.TrimRight(string(out), "\r\n"), augmentErr(err, exitStderr(err))
}
// exitStderr returns the stderr captured by cmd.Output() on a failed exec (it
// stows it on *exec.ExitError), or nil. The tools we shell out to (vault, bw)
// write the actionable message there — "connection refused", "permission
// denied" — which the caller would otherwise never see behind a bare
// "exit status N".
func exitStderr(err error) []byte {
var ee *exec.ExitError
if errors.As(err, &ee) {
return ee.Stderr
}
return nil
}
// augmentErr appends captured stderr to an error so failures are diagnosable
// (not just "exit status 2"). Returns nil when err is nil, and err unchanged
// when there's no stderr; preserves the wrapped error for errors.Is/As.
func augmentErr(err error, stderr []byte) error {
if err == nil {
return nil
}
if s := strings.TrimSpace(string(stderr)); s != "" {
return fmt.Errorf("%w: %s", err, s)
}
return err
}
// realRunnerStdin runs a command feeding `stdin` to it, for secret values that
@ -92,7 +119,7 @@ func realRunnerStdin(name string, argv, envv []string, stdin string) (string, er
}
cmd.Stdin = strings.NewReader(stdin)
out, err := cmd.Output()
return strings.TrimRight(string(out), "\r\n"), err
return strings.TrimRight(string(out), "\r\n"), augmentErr(err, exitStderr(err))
}
func vwCredsPath(user string) string { return vwUserPathPrefix + user }
@ -135,26 +162,58 @@ func scopedTokenPath(home string) string {
}
// vaultTokenSource decides which Vault token the `vault` child processes should
// use. Precedence: an explicit $VAULT_TOKEN, then a native ~/.vault-token (what
// admins carry), then the per-user scoped token claude-auth-sync maintains at
// scopedTokenPath(HOME) (policy workstation-claude-<user>, which grants exactly
// the create/read/update this tool needs on the user's own path). Returns the
// token to export — "" when nothing must be exported because the vault CLI reads
// the ambient credential natively — plus a source tag for tests/logging.
// use. Precedence: an explicit $VAULT_TOKEN (deliberate override), then the
// per-user scoped token claude-auth-sync maintains at scopedTokenPath(HOME)
// (policy workstation-claude-<user>, which grants exactly the create/read/update
// this tool needs on the user's own path), then a native ~/.vault-token.
//
// The scoped token MUST beat ~/.vault-token: this tool only ever touches the
// caller's own secret/workstation/claude-users/<user> path, and a power-user who
// ran `vault login -method=oidc` carries a read-only ~/.vault-token whose
// capability on that path is `deny` — letting it win shadows the scoped token
// and every op fails 403/deny (emo, 2026-06-28). ~/.vault-token is only the
// right credential when there is no scoped token (admins). Returns the token to
// export — "" when the vault CLI should read the ambient/native credential —
// plus a source tag for tests/logging.
func vaultTokenSource(envToken string, haveVaultTokenFile bool, scopedToken string) (token, source string) {
switch {
case envToken != "":
return "", "env"
case strings.TrimSpace(scopedToken) != "":
return strings.TrimSpace(scopedToken), "scoped"
case haveVaultTokenFile:
return "", "file"
default:
if t := strings.TrimSpace(scopedToken); t != "" {
return t, "scoped"
}
return "", "none"
}
}
// vaultAddrDefault is the cluster Vault the workstation talks to. The bw server
// is likewise hardcoded (openSession), so a sane default here is consistent.
const vaultAddrDefault = "https://vault.viktorbarzin.me"
// vaultAddrToSet returns the VAULT_ADDR to export when the caller's environment
// doesn't already set one, else "". homelab vault is invoked by AFK agent
// sessions — frequently non-login shells (tmux panes, agent subprocesses) that
// never sourced /etc/environment — so, like claude-auth-sync, the CLI must NOT
// depend on an ambient VAULT_ADDR; otherwise every `vault` child falls back to
// the 127.0.0.1:8200 default and fails "connection refused" (exit 2).
func vaultAddrToSet(envAddr string) string {
if strings.TrimSpace(envAddr) == "" {
return vaultAddrDefault
}
return ""
}
// ensureVaultAddr exports the default VAULT_ADDR when none is set, so the vault
// child processes reach the cluster Vault regardless of the caller's shell. An
// explicit VAULT_ADDR (admins, CI) is left untouched.
func ensureVaultAddr() {
if a := vaultAddrToSet(os.Getenv("VAULT_ADDR")); a != "" {
os.Setenv("VAULT_ADDR", a)
}
}
// fileNonEmpty reports whether path exists and has content.
func fileNonEmpty(path string) bool {
fi, err := os.Stat(path)
@ -167,6 +226,10 @@ func fileNonEmpty(path string) bool {
// is idempotent and safe for admins, whose explicit $VAULT_TOKEN / ~/.vault-token
// take precedence and are left untouched.
func ensureVaultToken() {
// Every vault verb funnels through here, so this is the one place that also
// guarantees VAULT_ADDR is set (see vaultAddrToSet for why it can't be
// assumed from the caller's shell).
ensureVaultAddr()
home := os.Getenv("HOME")
scoped, _ := os.ReadFile(scopedTokenPath(home))
tok, src := vaultTokenSource(os.Getenv("VAULT_TOKEN"), home != "" && fileNonEmpty(home+"/.vault-token"), string(scoped))

View file

@ -2,6 +2,7 @@ package main
import (
"encoding/base64"
"errors"
"fmt"
"os"
"reflect"
@ -269,6 +270,29 @@ func TestEnsureVaultTokenKeepsExplicitEnv(t *testing.T) {
}
}
func TestEnsureVaultTokenPrefersScopedOverFile(t *testing.T) {
// Regression: a power-user's read-only OIDC ~/.vault-token must NOT shadow the
// purpose-built scoped token (emo's setup hit 403 because it did, 2026-06-28).
dir := t.TempDir()
cfg := dir + "/.config/claude-auth-sync"
if err := os.MkdirAll(cfg, 0o700); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(cfg+"/vault-token", []byte("SCOPED-TOK"), 0o600); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(dir+"/.vault-token", []byte("STALE-OIDC-TOK"), 0o600); err != nil {
t.Fatal(err)
}
t.Setenv("HOME", dir)
t.Setenv("VAULT_TOKEN", "")
ensureVaultToken()
if got := os.Getenv("VAULT_TOKEN"); got != "SCOPED-TOK" {
t.Fatalf("VAULT_TOKEN = %q, want the scoped token to win over a stale ~/.vault-token", got)
}
}
func TestScopedTokenPath(t *testing.T) {
if got := scopedTokenPath("/home/emo"); got != "/home/emo/.config/claude-auth-sync/vault-token" {
t.Fatalf("scopedTokenPath = %q", got)
@ -276,9 +300,10 @@ func TestScopedTokenPath(t *testing.T) {
}
func TestVaultTokenSource(t *testing.T) {
// Precedence: explicit $VAULT_TOKEN > ~/.vault-token (vault CLI native) >
// the claude-auth-sync per-user scoped token. This is what lets a non-admin
// workstation user (no ambient token) reach their own Vault path.
// Precedence: explicit $VAULT_TOKEN > the claude-auth-sync per-user scoped
// token > a native ~/.vault-token. Scoped beats the file so a power-user's
// read-only OIDC ~/.vault-token can't shadow the scoped token on the user's
// own path (emo, 2026-06-28).
cases := []struct {
name string
env string
@ -287,10 +312,11 @@ func TestVaultTokenSource(t *testing.T) {
wantTok, wantSrc string
}{
{"explicit env wins", "abc", true, "S", "", "env"},
{"vault-token file used natively", "", true, "S", "", "file"},
{"scoped fallback for non-admin", "", false, "S-TOK", "S-TOK", "scoped"},
{"scoped beats a stale ~/.vault-token", "", true, "S-TOK", "S-TOK", "scoped"},
{"scoped used when no file", "", false, "S-TOK", "S-TOK", "scoped"},
{"native ~/.vault-token only when no scoped", "", true, "", "", "file"},
{"scoped value is trimmed", "", false, " S-TOK\n", "S-TOK", "scoped"},
{"whitespace-only scoped is no token", "", false, " \n", "", "none"},
{"whitespace-only scoped falls back to file", "", true, " \n", "", "file"},
{"nothing configured", "", false, "", "", "none"},
}
for _, c := range cases {
@ -302,6 +328,66 @@ func TestVaultTokenSource(t *testing.T) {
}
}
func TestVaultAddrToSet(t *testing.T) {
// homelab vault is invoked by AFK agent sessions (non-login shells that
// never sourced /etc/environment), so the CLI must self-default VAULT_ADDR
// rather than rely on the ambient env — else every `vault` child hits the
// 127.0.0.1:8200 default and fails "connection refused" (exit 2).
cases := []struct {
name, env, want string
}{
{"unset -> default", "", vaultAddrDefault},
{"whitespace-only -> default", " \n", vaultAddrDefault},
{"explicit kept (empty = leave alone)", "https://vault.example.com", ""},
}
for _, c := range cases {
if got := vaultAddrToSet(c.env); got != c.want {
t.Errorf("%s: vaultAddrToSet(%q) = %q, want %q", c.name, c.env, got, c.want)
}
}
}
func TestEnsureVaultTokenSetsDefaultAddr(t *testing.T) {
dir := t.TempDir() // no scoped token, no ~/.vault-token
t.Setenv("HOME", dir)
t.Setenv("VAULT_TOKEN", "")
t.Setenv("VAULT_ADDR", "") // emo's non-login-shell situation
ensureVaultToken()
if got := os.Getenv("VAULT_ADDR"); got != vaultAddrDefault {
t.Fatalf("VAULT_ADDR = %q, want default %q to be exported", got, vaultAddrDefault)
}
}
func TestEnsureVaultTokenKeepsExplicitAddr(t *testing.T) {
dir := t.TempDir()
t.Setenv("HOME", dir)
t.Setenv("VAULT_TOKEN", "")
t.Setenv("VAULT_ADDR", "https://vault.example.com")
ensureVaultToken()
if got := os.Getenv("VAULT_ADDR"); got != "https://vault.example.com" {
t.Fatalf("VAULT_ADDR = %q, must not override an explicit addr", got)
}
}
func TestAugmentErrSurfacesStderr(t *testing.T) {
if got := augmentErr(nil, []byte("ignored")); got != nil {
t.Fatalf("augmentErr(nil, …) = %v, want nil", got)
}
base := errors.New("exit status 2")
got := augmentErr(base, []byte(" dial tcp 127.0.0.1:8200: connect: connection refused\n"))
if got == nil || !strings.Contains(got.Error(), "connection refused") || !strings.Contains(got.Error(), "exit status 2") {
t.Fatalf("augmentErr did not surface stderr: %v", got)
}
if !errors.Is(got, base) {
t.Fatal("augmentErr lost the wrapped error (errors.Is failed)")
}
if got := augmentErr(base, []byte(" ")); got != base {
t.Fatalf("augmentErr with blank stderr = %v, want the original error unchanged", got)
}
}
func TestKvWriteVerb(t *testing.T) {
// merge=true → read-modify-write patch (needs only read+update, NOT the
// `patch` capability the scoped workstation policy lacks).