workstation: roster-driven provisioner (SSoT reconcile, additive-only)
All checks were successful
ci/woodpecker/push/default Pipeline was successful
ci/woodpecker/push/build-cli Pipeline was successful

t3-provision-users.sh now consumes roster_engine.py: derives accounts + per-tier groups + sticky ports + /etc/ttyd-user-map + dispatch.json from roster.yaml and applies them. ADDITIVE-ONLY for existing users (never strips a group, replaces a home, or re-locks an account) so the hourly timer is always safe. Best-effort tier validation vs live k8s_users: warns on a net-new absent user (emo), aborts only on a real tier conflict, skips when root has no Vault token. DRY_RUN mode for safe testing. Verified on the live host: reproduces dispatch.json content exactly, emo/anca groups + all t3-serve instances unchanged, idempotent, shellcheck-clean; deployed to /usr/local/bin (hourly timer target).

Engine: validate_tiers now returns ValidationIssue(severity) — error=conflict (abort) vs warn=absent (grant pending) — + has_blocking_errors(); 28 pytest cases. setup-devvm.sh redeploys the provisioner for reproducibility.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-06-08 14:18:12 +00:00
parent 3feb69e379
commit 2c1865eabb
4 changed files with 159 additions and 64 deletions

View file

@ -117,26 +117,49 @@ def load_roster_file(path: str) -> Roster:
# --------------------------------------------------------------------------
def validate_tiers(roster: Roster, k8s_user_tiers: dict[str, str]) -> list[str]:
"""Return one error string per roster user whose tier disagrees with the
live `k8s_users` map. Admins are exempt (cluster-admin is granted out of
band). An empty list means the roster is consistent with the cluster."""
errors = []
@dataclass(frozen=True)
class ValidationIssue:
os_user: str
severity: str # "error" = tier conflict (abort) | "warn" = absent (grant pending)
message: str
def validate_tiers(
roster: Roster, k8s_user_tiers: dict[str, str]
) -> list[ValidationIssue]:
"""Compare each roster user's tier against the live `k8s_users` map. A real
conflict (roster tier != cluster tier) is an "error" (abort). A net-new user
not yet in `k8s_users` is a "warn" (onboarding proceeds; the kubectl grant is
pending). Admins are exempt (cluster-admin is granted out of band). An empty
list means the roster is consistent with the cluster."""
issues = []
for user in roster.users.values():
if user.tier == "admin":
continue
actual = k8s_user_tiers.get(user.k8s_user)
if actual is None:
errors.append(
f"{user.os_user}: tier {user.tier} but k8s_user {user.k8s_user!r} "
f"absent from k8s_users (add the entry first)"
issues.append(
ValidationIssue(
user.os_user,
"warn",
f"{user.os_user}: tier {user.tier} but k8s_user {user.k8s_user!r} "
f"absent from k8s_users (kubectl grant pending — add the entry)",
)
)
elif actual != user.tier:
errors.append(
f"{user.os_user}: roster tier {user.tier} != k8s_users tier "
f"{actual} for {user.k8s_user!r}"
issues.append(
ValidationIssue(
user.os_user,
"error",
f"{user.os_user}: roster tier {user.tier} != k8s_users tier "
f"{actual} for {user.k8s_user!r}",
)
)
return errors
return issues
def has_blocking_errors(issues: list[ValidationIssue]) -> bool:
return any(issue.severity == "error" for issue in issues)
# --------------------------------------------------------------------------
@ -261,10 +284,10 @@ def _main(argv: list[str]) -> int:
roster = load_roster_file(args.roster)
if args.cmd == "validate":
with open(args.k8s_users_json, encoding="utf-8") as fh:
errors = validate_tiers(roster, json.load(fh))
for err in errors:
print(err, file=sys.stderr)
return 1 if errors else 0
issues = validate_tiers(roster, json.load(fh))
for issue in issues:
print(f"{issue.severity.upper()}: {issue.message}", file=sys.stderr)
return 1 if has_blocking_errors(issues) else 0
with open(args.ports_json, encoding="utf-8") as fh:
desired = derive_desired_state(roster, json.load(fh))
json.dump(_desired_state_to_dict(desired), sys.stdout, indent=2, sort_keys=True)

View file

@ -63,4 +63,9 @@ for d in skills rules agents commands; do
done
log "skel: launcher + tmux + inheritance symlinks (base=$CONFIG_BASE)"
# 6) deploy the roster-driven provisioner to /usr/local/bin (run hourly by
# t3-provision-users.timer). Re-deployed here so its logic is reproducible.
install -m 0755 "$HERE/../t3-provision-users.sh" /usr/local/bin/t3-provision-users
log "t3-provision-users -> /usr/local/bin/ (roster-driven)"
log "OK (idempotent)"

View file

@ -99,24 +99,26 @@ def test_validate_ok_when_tiers_match():
assert eng.validate_tiers(r, {"anca": "namespace-owner"}) == []
def test_validate_flags_tier_mismatch():
def test_validate_flags_tier_mismatch_as_error():
# roster says power-user, cluster says namespace-owner -> a real conflict -> ERROR (abort).
r = _roster(
"users: {ancamilea: {authentik_user: a, k8s_user: anca, tier: power-user}}"
)
errs = eng.validate_tiers(r, {"anca": "namespace-owner"})
assert len(errs) == 1
assert (
"anca" in errs[0] and "power-user" in errs[0] and "namespace-owner" in errs[0]
)
issues = eng.validate_tiers(r, {"anca": "namespace-owner"})
assert len(issues) == 1
assert issues[0].severity == "error"
assert issues[0].os_user == "ancamilea"
assert "power-user" in issues[0].message and "namespace-owner" in issues[0].message
def test_validate_flags_netnew_user_absent_from_k8s_users():
# emo is power-user in the roster but has no k8s_users entry yet -> the OIDC
# RBAC binding can't exist, so this must fail loud (add the entry first).
def test_validate_flags_netnew_absent_as_warn():
# emo is power-user in the roster but has no k8s_users entry yet. Onboarding the
# workstation should still proceed; the kubectl grant is pending -> WARN, not error.
r = _roster("users: {emo: {authentik_user: e, k8s_user: emo, tier: power-user}}")
errs = eng.validate_tiers(r, {})
assert len(errs) == 1
assert "emo" in errs[0] and "k8s_users" in errs[0]
issues = eng.validate_tiers(r, {})
assert len(issues) == 1
assert issues[0].severity == "warn"
assert "emo" in issues[0].message and "k8s_users" in issues[0].message
def test_validate_skips_admin_tier():
@ -127,6 +129,22 @@ def test_validate_skips_admin_tier():
assert eng.validate_tiers(r, {}) == []
def test_has_blocking_errors_distinguishes_mismatch_from_absent():
mismatch = _roster(
"users: {ancamilea: {authentik_user: a, k8s_user: anca, tier: power-user}}"
)
absent = _roster(
"users: {emo: {authentik_user: e, k8s_user: emo, tier: power-user}}"
)
assert (
eng.has_blocking_errors(
eng.validate_tiers(mismatch, {"anca": "namespace-owner"})
)
is True
)
assert eng.has_blocking_errors(eng.validate_tiers(absent, {})) is False
# --------------------------------------------------------------------------
# derive_desired_state: accounts, sticky ports, ttyd map, dispatch (module #1)
# --------------------------------------------------------------------------