diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index b5383183..1e8d74a9 100755 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -72,7 +72,7 @@ - `.woodpecker/build-fallback.yml` — Old full build pipeline preserved (event: `deployment` — never auto-fires) **Woodpecker API**: Uses **numeric repo IDs** (`/api/repos/2/pipelines`), NOT owner/name paths (those return HTML). -Repo IDs: Website=2, travel_blog=5, health=4, audiblez-web=9, f1-stream=10, plotting-book=43, claude-memory-mcp=78, infra-onboarding=79 +Repo IDs: infra=1, Website=2, finance=3, health=4, travel_blog=5, webhook-handler=6, audiblez-web=9, f1-stream=10, plotting-book=43, claude-memory-mcp=78, infra-onboarding=79 **Woodpecker YAML gotchas**: - Commands with `${VAR}:${VAR}` must be **quoted** — unquoted `:` triggers YAML map parsing when vars are empty diff --git a/.claude/skills/add-user/SKILL.md b/.claude/skills/add-user/SKILL.md index 212344c6..bc03cc7a 100644 --- a/.claude/skills/add-user/SKILL.md +++ b/.claude/skills/add-user/SKILL.md @@ -10,11 +10,51 @@ description: | # Add User -Add a new namespace-owner to the cluster. No code changes needed — only Vault KV update + stack applies. +Add a new namespace-owner to the cluster. Two modes: **automated** (preferred) and **manual** (fallback). SOPS state encryption access is **automatically provisioned** by the vault stack — per-stack Transit keys, policies, identity groups, and group aliases are all created from the `k8s_users` map. No manual SOPS setup required. -## Workflow +## Automated Flow (Preferred) + +**Admin creates an Authentik invite → user signs up → provisioning happens automatically.** + +### Steps + +1. **Create Authentik Invitation** + - Go to [Authentik Admin](https://authentik.viktorbarzin.me/if/admin/#/core/invitations) + - Create a new invitation + - Pre-assign the user to the **`kubernetes-namespace-owners`** group + - Copy the invite link + +2. **Send Invite Link to User** + - The user clicks the link and signs up + +3. **Automatic Provisioning** + - Authentik fires a webhook to `webhook.viktorbarzin.me/authentik/provision` + - The webhook handler validates the event and triggers the Woodpecker `provision-user` pipeline + - Pipeline automatically: + - Adds user to Vault KV (`secret/platform` → `k8s_users`) with convention defaults + - Creates `sops-` group in Authentik and assigns the user + - Applies stacks: vault → rbac → cloudflared → woodpecker + - Commits encrypted state and pushes + - Sends Slack notification + +4. **Convention Defaults** (applied automatically) + - Namespace: `username` + - Quota: CPU 2, Memory 4Gi requests / 8Gi limits, 20 pods + - Domains: none (user can request later) + +5. **Post-Provisioning** + - Send user the onboarding link: `https://k8s-portal.viktorbarzin.me/onboarding?role=namespace-owner` + - If custom quota/domains needed, update Vault KV manually and re-apply stacks + +### Monitoring the Pipeline + +Watch the pipeline at: `https://ci.viktorbarzin.me` → infra repo → provision-user pipeline + +## Manual Flow (Fallback) + +Use when automated flow isn't available or custom configuration is needed. ### Step 1: Collect Information @@ -98,20 +138,7 @@ cd stacks/woodpecker && ../../scripts/tg apply --non-interactive cd ../.. ``` -### Step 4: Create Per-Stack Encrypted State - -For each of the user's namespaces, ensure the Transit key is used for state encryption. New stacks created for the user will automatically use per-stack keys via `scripts/state-sync`. - -If the user's stack already has state, re-encrypt it with the new per-stack key: -```bash -# Force re-encrypt (delete old .enc, state-sync will use per-stack Transit key) -rm state/stacks/NAMESPACE/terraform.tfstate.enc -scripts/state-sync encrypt NAMESPACE -git add state/stacks/NAMESPACE/terraform.tfstate.enc -git commit -m "state(NAMESPACE): re-encrypt with per-stack Transit key" -``` - -### Step 5: Verify +### Step 4: Verify ```bash # Namespace exists @@ -135,12 +162,9 @@ vault write kubernetes/creds/NAMESPACE-deployer kubernetes_namespace=NAMESPACE # SOPS Transit key exists vault read transit/keys/sops-state-NAMESPACE - -# DNS record (if domains specified) -dig DOMAIN.viktorbarzin.me ``` -### Step 6: Notify User +### Step 5: Notify User Tell the user to share these onboarding instructions with the new user: - K8s Portal: `https://k8s-portal.viktorbarzin.me/onboarding?role=namespace-owner` @@ -171,7 +195,7 @@ scripts/state-sync decrypt NAMESPACE # decrypts only their stack | Cloudflare DNS records | cloudflared | `domains` list | | Woodpecker admin access | woodpecker | user key | -## Checklist +## Checklist (Manual Flow) - [ ] Authentik: user added to `kubernetes-namespace-owners` group - [ ] Authentik: user added to `sops-USERNAME` group (for SOPS state decrypt) diff --git a/.woodpecker/provision-user.yml b/.woodpecker/provision-user.yml new file mode 100644 index 00000000..6dddf719 --- /dev/null +++ b/.woodpecker/provision-user.yml @@ -0,0 +1,196 @@ +when: + event: manual + +clone: + git: + image: woodpeckerci/plugin-git + settings: + attempts: 5 + backoff: 10s + +steps: + - name: validate-inputs + image: alpine + commands: + - | + if [ -z "${CI_PIPELINE_VARIABLE_USERNAME}" ] || [ -z "${CI_PIPELINE_VARIABLE_EMAIL}" ]; then + echo "ERROR: USERNAME and EMAIL variables are required" + echo "Trigger with: POST /api/repos/{id}/pipelines {branch:master, variables:{USERNAME:x, EMAIL:y}}" + exit 1 + fi + echo "Provisioning user: ${CI_PIPELINE_VARIABLE_USERNAME} (${CI_PIPELINE_VARIABLE_EMAIL})" + # Write vars to shared file for subsequent steps + echo "export PROVISION_USERNAME=${CI_PIPELINE_VARIABLE_USERNAME}" > .provision-env + echo "export PROVISION_EMAIL=${CI_PIPELINE_VARIABLE_EMAIL}" >> .provision-env + + - name: prepare + image: alpine + commands: + - "apk update && apk add jq curl git git-crypt" + # git-crypt for secrets/ directory + - | + curl -k https://10.0.20.100:6443/api/v1/namespaces/woodpecker/configmaps/git-crypt-key \ + -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \ + | jq -r .data.key | base64 -d > /tmp/key + - "git-crypt unlock /tmp/key && rm /tmp/key" + # Vault: authenticate via K8s service account JWT + - | + SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) + VAULT_TOKEN=$(curl -s -X POST http://vault-active.vault.svc.cluster.local:8200/v1/auth/kubernetes/login \ + -d "{\"role\":\"ci\",\"jwt\":\"$SA_TOKEN\"}" | jq -r .auth.client_token) + echo "export VAULT_TOKEN=$VAULT_TOKEN" > .vault-env + echo "export VAULT_ADDR=http://vault-active.vault.svc.cluster.local:8200" >> .vault-env + + - name: update-vault-kv + image: alpine + commands: + - "apk update && apk add jq curl" + - "source .provision-env && source .vault-env" + # Read current platform secret + - | + source .provision-env && source .vault-env + CURRENT=$(curl -s -H "X-Vault-Token: $VAULT_TOKEN" \ + "$VAULT_ADDR/v1/secret/data/platform" | jq -r '.data.data') + + # Parse current k8s_users (stored as JSON string) + CURRENT_USERS=$(echo "$CURRENT" | jq -r '.k8s_users') + + # Check if user already exists + if echo "$CURRENT_USERS" | jq -e --arg u "$PROVISION_USERNAME" '.[$u]' >/dev/null 2>&1; then + echo "User $PROVISION_USERNAME already exists in k8s_users — skipping Vault KV update" + exit 0 + fi + + # Add new user with convention defaults + UPDATED_USERS=$(echo "$CURRENT_USERS" | jq --arg u "$PROVISION_USERNAME" --arg e "$PROVISION_EMAIL" \ + '. + {($u): {"role":"namespace-owner","email":$e,"namespaces":[$u],"domains":[],"quota":{"cpu_requests":"2","memory_requests":"4Gi","memory_limits":"8Gi","pods":"20"}}}') + + # Write back full platform secret with updated k8s_users (as JSON string) + PAYLOAD=$(echo "$CURRENT" | jq --arg users "$UPDATED_USERS" '.k8s_users = $users') + + curl -s -X POST -H "X-Vault-Token: $VAULT_TOKEN" \ + "$VAULT_ADDR/v1/secret/data/platform" \ + -d "{\"data\": $PAYLOAD}" | jq . + + echo "Added $PROVISION_USERNAME to k8s_users in Vault" + + - name: create-authentik-groups + image: alpine + commands: + - "apk update && apk add jq curl" + - | + source .provision-env && source .vault-env + + # Get Authentik API token from Vault + AUTHENTIK_TOKEN=$(curl -s -H "X-Vault-Token: $VAULT_TOKEN" \ + "$VAULT_ADDR/v1/secret/data/viktor" | jq -r '.data.data.authentik_api_token') + AUTHENTIK_URL="https://authentik.viktorbarzin.me" + + # Create sops-USERNAME group if it doesn't exist + SOPS_GROUP="sops-$PROVISION_USERNAME" + EXISTING=$(curl -s -H "Authorization: Bearer $AUTHENTIK_TOKEN" \ + "$AUTHENTIK_URL/api/v3/core/groups/?name=$SOPS_GROUP" | jq -r '.results | length') + + if [ "$EXISTING" = "0" ]; then + GROUP_PK=$(curl -s -X POST -H "Authorization: Bearer $AUTHENTIK_TOKEN" \ + -H "Content-Type: application/json" \ + "$AUTHENTIK_URL/api/v3/core/groups/" \ + -d "{\"name\": \"$SOPS_GROUP\", \"is_superuser\": false}" | jq -r '.pk') + echo "Created Authentik group $SOPS_GROUP (pk=$GROUP_PK)" + else + GROUP_PK=$(curl -s -H "Authorization: Bearer $AUTHENTIK_TOKEN" \ + "$AUTHENTIK_URL/api/v3/core/groups/?name=$SOPS_GROUP" | jq -r '.results[0].pk') + echo "Authentik group $SOPS_GROUP already exists (pk=$GROUP_PK)" + fi + + # Find the user by username + USER_PK=$(curl -s -H "Authorization: Bearer $AUTHENTIK_TOKEN" \ + "$AUTHENTIK_URL/api/v3/core/users/?username=$PROVISION_USERNAME" | jq -r '.results[0].pk') + + if [ "$USER_PK" = "null" ] || [ -z "$USER_PK" ]; then + echo "WARNING: User $PROVISION_USERNAME not found in Authentik — group assignment skipped" + echo "The user may not have signed up yet. Groups will need manual assignment." + exit 0 + fi + + # Add user to sops group + CURRENT_MEMBERS=$(curl -s -H "Authorization: Bearer $AUTHENTIK_TOKEN" \ + "$AUTHENTIK_URL/api/v3/core/groups/$GROUP_PK/" | jq -r '.users') + UPDATED_MEMBERS=$(echo "$CURRENT_MEMBERS" | jq --argjson uid "$USER_PK" '. + [$uid] | unique') + + curl -s -X PATCH -H "Authorization: Bearer $AUTHENTIK_TOKEN" \ + -H "Content-Type: application/json" \ + "$AUTHENTIK_URL/api/v3/core/groups/$GROUP_PK/" \ + -d "{\"users\": $UPDATED_MEMBERS}" | jq . + + echo "Added user $PROVISION_USERNAME (pk=$USER_PK) to group $SOPS_GROUP" + + - name: terragrunt-apply + image: alpine + backend_options: + kubernetes: + resources: + requests: + memory: 3Gi + limits: + memory: 6Gi + commands: + - "apk update && apk add curl unzip git openssh-client python3 py3-pip py3-yaml" + # Install sops + - "wget -qO /usr/local/bin/sops https://github.com/getsops/sops/releases/download/v3.9.4/sops-v3.9.4.linux.amd64" + - "chmod 755 /usr/local/bin/sops" + # Install Terraform + - "wget -qO /tmp/terraform.zip https://releases.hashicorp.com/terraform/1.5.7/terraform_1.5.7_linux_amd64.zip" + - "unzip -o /tmp/terraform.zip -d /usr/local/bin/ && chmod 755 /usr/local/bin/terraform" + # Install Terragrunt + - "wget -qO /usr/local/bin/terragrunt https://github.com/gruntwork-io/terragrunt/releases/download/v0.99.4/terragrunt_linux_amd64" + - "chmod 755 /usr/local/bin/terragrunt" + # Source Vault token + - "source .vault-env" + # Apply stacks sequentially: vault → rbac → cloudflared → woodpecker + - | + source .vault-env + export VAULT_ADDR + export VAULT_TOKEN + for stack in vault rbac cloudflared woodpecker; do + echo "=== Applying stack: $stack ===" + cd "stacks/$stack" + # Decrypt state + ../../scripts/state-sync decrypt "$stack" || true + # Apply + terragrunt apply --non-interactive -auto-approve -backup=- + # Encrypt state + ../../scripts/state-sync encrypt "$stack" || true + cd ../.. + echo "=== Done: $stack ===" + done + + - name: commit-and-push + image: alpine + commands: + - "apk update && apk add openssh-client git git-crypt" + - "mkdir -p ~/.ssh && ssh-keyscan -H github.com >> ~/.ssh/known_hosts" + - "chmod 400 secrets/deploy_key" + - | + . .provision-env + # Only add state files + git add state/ || true + git remote set-url origin git@github.com:ViktorBarzin/infra.git + git commit -m "feat(provision): auto-provision user ${PROVISION_USERNAME} [CI SKIP]" || echo "No changes to commit" + GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git pull --rebase origin master || true + GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git push origin master + when: + status: [success, failure] + + - name: slack + image: curlimages/curl + commands: + - | + curl -s -X POST -H 'Content-type: application/json' \ + --data "{\"channel\":\"general\",\"text\":\"Woodpecker CI: User provisioning for ${CI_PIPELINE_VARIABLE_USERNAME:-unknown} ${CI_PIPELINE_STATUS}\"}" \ + "$SLACK_WEBHOOK" || true + environment: + SLACK_WEBHOOK: + from_secret: slack_webhook + when: + status: [success, failure] diff --git a/stacks/vault/main.tf b/stacks/vault/main.tf index 77517420..8959be73 100644 --- a/stacks/vault/main.tf +++ b/stacks/vault/main.tf @@ -327,10 +327,24 @@ resource "vault_policy" "ci" { path "secret/metadata/*" { capabilities = ["list"] } + # Allow CI to write k8s_users during automated user provisioning + path "secret/data/platform" { + capabilities = ["create", "read", "update"] + } # Allow CI to get dynamic K8s deploy tokens for user namespaces path "kubernetes/creds/*-deployer" { capabilities = ["read"] } + # SOPS state encrypt/decrypt (per-stack Transit keys) + path "transit/encrypt/sops-state-*" { + capabilities = ["update"] + } + path "transit/decrypt/sops-state-*" { + capabilities = ["update"] + } + path "transit/keys/sops-state-*" { + capabilities = ["read"] + } EOT } diff --git a/stacks/webhook_handler/main.tf b/stacks/webhook_handler/main.tf index b47216e3..2011f9d6 100644 --- a/stacks/webhook_handler/main.tf +++ b/stacks/webhook_handler/main.tf @@ -187,6 +187,37 @@ resource "kubernetes_deployment" "webhook_handler" { name = "SSH_KEY" value = "/opt/id_rsa" } + env { + name = "WOODPECKER_API_URL" + value = "https://ci.viktorbarzin.me" + } + env { + name = "WOODPECKER_TOKEN" + value_from { + secret_key_ref { + name = "webhook-handler-secrets" + key = "woodpecker_token" + } + } + } + env { + name = "WOODPECKER_INFRA_REPO_ID" + value_from { + secret_key_ref { + name = "webhook-handler-secrets" + key = "woodpecker_infra_repo_id" + } + } + } + env { + name = "AUTHENTIK_WEBHOOK_SECRET" + value_from { + secret_key_ref { + name = "webhook-handler-secrets" + key = "authentik_webhook_secret" + } + } + } } volume { name = "id-rsa"