feat: CI/CD performance overhaul

- New custom CI Docker image (ci/Dockerfile) with TF 1.5.7, TG 0.99.4,
  git-crypt, sops, kubectl pre-installed. Pushed to private registry.
  Eliminates 17 apk add calls + binary downloads per pipeline run.

- Unified CI pipeline: merge default.yml + app-stacks.yml into one.
  Changed-stacks-only detection (git diff, with global-file fallback).
  Concurrency limit (xargs -P 4). Step consolidation (2 steps vs 4).
  Shallow clone (depth=2). Provider cache (TF_PLUGIN_CACHE_DIR).

- Per-stack Vault advisory locks in scripts/tg. 30min TTL with stale
  lock detection. Blocks concurrent applies to same stack.

- TF_PLUGIN_CACHE_DIR enabled by default in scripts/tg for local dev.

- Daily drift detection pipeline (.woodpecker/drift-detection.yml).
  Runs terraform plan on all stacks, Slack alert on drift.

- CI image build pipeline (.woodpecker/build-ci-image.yml).

Expected speedup: ~5-10 min per pipeline run → ~2-4 min.

[ci skip]

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-04-15 11:22:26 +00:00
parent bcad200a23
commit 36454b87d1
6 changed files with 352 additions and 177 deletions

View file

@ -1,122 +0,0 @@
when:
event: push
branch: master
# Only trigger when application stack files change
path:
include:
- 'stacks/**'
exclude:
- '.woodpecker/**'
clone:
git:
image: woodpeckerci/plugin-git
settings:
attempts: 5
backoff: 10s
steps:
- name: detect-changes
image: alpine
commands:
- apk add --no-cache git
# Detect which stacks changed in the latest commit
- |
CHANGED=$(git diff --name-only HEAD~1 HEAD 2>/dev/null | grep '^stacks/' | cut -d/ -f2 | sort -u || true)
if [ -z "$CHANGED" ]; then
echo "No stack changes detected"
echo "" > .stacks_to_apply
exit 0
fi
# Exclude platform stacks (handled by default.yml)
PLATFORM="dbaas authentik crowdsec monitoring nvidia mailserver cloudflared kyverno metallb redis traefik technitium headscale rbac k8s-portal vaultwarden reverse-proxy metrics-server vpa nfs-csi iscsi-csi cnpg sealed-secrets uptime-kuma wireguard xray infra-maintenance platform vault reloader descheduler external-secrets"
APPLY=""
for stack in $CHANGED; do
if echo "$PLATFORM" | grep -qw "$stack"; then
echo "Skipping platform stack: $stack"
continue
fi
if [ ! -f "stacks/$stack/terragrunt.hcl" ]; then
echo "Skipping $stack (no terragrunt.hcl)"
continue
fi
APPLY="$APPLY $stack"
done
echo "$APPLY" > .stacks_to_apply
echo "Stacks to apply:$APPLY"
- name: prepare
image: alpine
commands:
- "apk update && apk add jq curl git git-crypt"
- |
curl -k https://10.0.20.100:6443/api/v1/namespaces/woodpecker/configmaps/git-crypt-key -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" | jq -r .data.key | base64 -d > /tmp/key
- "git-crypt unlock /tmp/key && rm /tmp/key"
- |
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
VAULT_TOKEN=$(curl -s -X POST http://vault-active.vault.svc.cluster.local:8200/v1/auth/kubernetes/login \
-d "{\"role\":\"ci\",\"jwt\":\"$SA_TOKEN\"}" | jq -r .auth.client_token)
echo "export VAULT_TOKEN=$VAULT_TOKEN" > .vault-env
echo "export VAULT_ADDR=http://vault-active.vault.svc.cluster.local:8200" >> .vault-env
when:
evaluate: 'CI_COMMIT_MESSAGE != "" && !contains(CI_COMMIT_MESSAGE, "[CI SKIP]")'
- name: terragrunt-apply
image: alpine
backend_options:
kubernetes:
resources:
requests:
memory: 2Gi
limits:
memory: 4Gi
commands:
- "apk update && apk add curl unzip git openssh-client"
- "wget -qO /tmp/terraform.zip https://releases.hashicorp.com/terraform/1.5.7/terraform_1.5.7_linux_amd64.zip"
- "unzip -o /tmp/terraform.zip -d /usr/local/bin/ && chmod 755 /usr/local/bin/terraform"
- "wget -qO /usr/local/bin/terragrunt https://github.com/gruntwork-io/terragrunt/releases/download/v0.99.4/terragrunt_linux_amd64"
- "chmod 755 /usr/local/bin/terragrunt"
- "source .vault-env"
- |
STACKS=$(cat .stacks_to_apply)
if [ -z "$STACKS" ]; then
echo "No app stacks to apply"
exit 0
fi
FAILED=""
for stack in $STACKS; do
echo "=== Applying: $stack ==="
(cd stacks/$stack && terragrunt apply --non-interactive -auto-approve) &
done
wait
when:
evaluate: 'CI_COMMIT_MESSAGE != "" && !contains(CI_COMMIT_MESSAGE, "[CI SKIP]")'
- name: cleanup-and-push
image: alpine
commands:
- "rm -f .vault-env"
- "apk update && apk add openssh-client git git-crypt"
- "mkdir -p ~/.ssh && ssh-keyscan -H github.com >> ~/.ssh/known_hosts"
- "chmod 400 secrets/deploy_key"
- "git add stacks/ state/ .woodpecker/ || true"
- "git remote set-url origin git@github.com:ViktorBarzin/infra.git"
- "git commit -m 'Woodpecker CI app-stacks deploy commit [CI SKIP]' || echo 'No changes'"
- "GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git pull --rebase origin master || true"
- "GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git push origin master"
when:
status: [success, failure]
- name: slack
image: curlimages/curl
commands:
- |
STACKS=$(cat .stacks_to_apply 2>/dev/null || echo "none")
curl -s -X POST -H 'Content-type: application/json' \
--data "{\"channel\":\"general\",\"text\":\"Woodpecker CI: app-stacks pipeline ${CI_PIPELINE_STATUS} (stacks:${STACKS})\"}" \
"$SLACK_WEBHOOK" || true
environment:
SLACK_WEBHOOK:
from_secret: slack_webhook
when:
status: [success, failure]

View file

@ -0,0 +1,41 @@
# Build the CI tools Docker image used by all infra pipelines.
# Triggers on changes to ci/Dockerfile or manual dispatch.
when:
event: [push, manual]
branch: master
path:
include:
- 'ci/Dockerfile'
steps:
- name: build-and-push
image: woodpeckerci/plugin-docker-buildx
settings:
repo: registry.viktorbarzin.me:5050/infra-ci
dockerfile: ci/Dockerfile
context: ci/
tags:
- latest
- "${CI_COMMIT_SHA:0:8}"
platforms: linux/amd64
registry: registry.viktorbarzin.me:5050
logins:
- registry: registry.viktorbarzin.me:5050
username:
from_secret: registry_user
password:
from_secret: registry_password
- name: slack
image: curlimages/curl
commands:
- |
curl -s -X POST -H 'Content-type: application/json' \
--data "{\"text\":\"CI image built: registry.viktorbarzin.me:5050/infra-ci:${CI_COMMIT_SHA:0:8}\"}" \
"$SLACK_WEBHOOK" || true
environment:
SLACK_WEBHOOK:
from_secret: slack_webhook
when:
status: [success]

View file

@ -1,3 +1,15 @@
# Unified infra CI pipeline — detects changed stacks and applies only those.
# Platform stacks and app stacks handled in one pipeline with proper ordering.
#
# Optimizations over the previous split pipeline:
# - Custom CI image (no apk/wget per step)
# - Shallow clone (depth=2 for git diff HEAD~1)
# - TF_PLUGIN_CACHE_DIR (shared provider cache)
# - Concurrency limit (xargs -P 4)
# - Step consolidation (2 steps instead of 4)
# - Changed-stacks-only detection (skips no-op applies)
# - Global-file fallback (modules/config changes trigger full apply)
when:
event: push
branch: master
@ -6,28 +18,14 @@ clone:
git:
image: woodpeckerci/plugin-git
settings:
depth: 2
attempts: 5
backoff: 10s
steps:
- name: prepare
image: alpine
commands:
- "apk update && apk add jq curl git git-crypt"
# git-crypt for secrets/ directory (TLS certs, deploy key)
- |
curl -k https://10.0.20.100:6443/api/v1/namespaces/woodpecker/configmaps/git-crypt-key -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" | jq -r .data.key | base64 -d > /tmp/key
- "git-crypt unlock /tmp/key && rm /tmp/key"
# Vault: authenticate via K8s service account JWT
- |
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
VAULT_TOKEN=$(curl -s -X POST http://vault-active.vault.svc.cluster.local:8200/v1/auth/kubernetes/login \
-d "{\"role\":\"ci\",\"jwt\":\"$SA_TOKEN\"}" | jq -r .auth.client_token)
echo "export VAULT_TOKEN=$VAULT_TOKEN" > .vault-env
echo "export VAULT_ADDR=http://vault-active.vault.svc.cluster.local:8200" >> .vault-env
- name: terragrunt-apply
image: alpine
- name: apply
image: registry.viktorbarzin.me:5050/infra-ci:latest
pull: true
backend_options:
kubernetes:
resources:
@ -35,51 +33,142 @@ steps:
memory: 3Gi
limits:
memory: 6Gi
environment:
SLACK_WEBHOOK:
from_secret: slack_webhook
commands:
- "apk update && apk add curl unzip git openssh-client"
# Install Terraform
- "wget -qO /tmp/terraform.zip https://releases.hashicorp.com/terraform/1.5.7/terraform_1.5.7_linux_amd64.zip"
- "unzip -o /tmp/terraform.zip -d /usr/local/bin/ && chmod 755 /usr/local/bin/terraform"
# Install Terragrunt
- "wget -qO /usr/local/bin/terragrunt https://github.com/gruntwork-io/terragrunt/releases/download/v0.99.4/terragrunt_linux_amd64"
- "chmod 755 /usr/local/bin/terragrunt"
# Source Vault token
- "source .vault-env"
# Apply all platform stacks in parallel
# ── Skip CI commits ──
- |
for stack in dbaas authentik crowdsec monitoring nvidia mailserver cloudflared kyverno \
metallb redis traefik technitium headscale rbac k8s-portal vaultwarden \
reverse-proxy metrics-server vpa nfs-csi iscsi-csi cnpg sealed-secrets \
uptime-kuma wireguard xray infra-maintenance platform; do
(cd stacks/$stack && terragrunt apply --non-interactive -auto-approve) &
if echo "$CI_COMMIT_MESSAGE" | grep -q '\[CI SKIP\]\|\[ci skip\]'; then
echo "Commit has [CI SKIP], exiting"
exit 0
fi
# ── git-crypt unlock ──
- |
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
curl -sk "https://10.0.20.100:6443/api/v1/namespaces/woodpecker/configmaps/git-crypt-key" \
-H "Authorization:Bearer $SA_TOKEN" | jq -r .data.key | base64 -d > /tmp/key
git-crypt unlock /tmp/key && rm /tmp/key
# ── Vault auth ──
- |
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
export VAULT_ADDR=http://vault-active.vault.svc.cluster.local:8200
export VAULT_TOKEN=$(curl -s -X POST "$VAULT_ADDR/v1/auth/kubernetes/login" \
-d "{\"role\":\"ci\",\"jwt\":\"$SA_TOKEN\"}" | jq -r .auth.client_token)
# ── Detect changed stacks ──
- |
PLATFORM_STACKS="dbaas authentik crowdsec monitoring nvidia mailserver cloudflared kyverno metallb redis traefik technitium headscale rbac k8s-portal vaultwarden reverse-proxy metrics-server vpa nfs-csi iscsi-csi cnpg sealed-secrets uptime-kuma wireguard xray infra-maintenance platform vault reloader descheduler external-secrets"
# Check if global files changed (triggers full platform apply)
GLOBAL_CHANGED=$(git diff --name-only HEAD~1 HEAD 2>/dev/null | grep -E '^(modules/|config\.tfvars|terragrunt\.hcl)' || true)
if [ -n "$GLOBAL_CHANGED" ]; then
echo "Global files changed — applying ALL platform stacks"
echo "$PLATFORM_STACKS" | tr ' ' '\n' > .platform_apply
else
# Detect platform stacks that changed
git diff --name-only HEAD~1 HEAD 2>/dev/null | grep '^stacks/' | cut -d/ -f2 | sort -u > .all_changed
> .platform_apply
while read -r stack; do
if echo "$PLATFORM_STACKS" | grep -qw "$stack"; then
echo "$stack" >> .platform_apply
fi
done < .all_changed
fi
# Detect app stacks that changed
> .app_apply
git diff --name-only HEAD~1 HEAD 2>/dev/null | grep '^stacks/' | cut -d/ -f2 | sort -u | while read -r stack; do
if echo "$PLATFORM_STACKS" | grep -qw "$stack"; then
continue # Skip platform stacks
fi
if [ ! -f "stacks/$stack/terragrunt.hcl" ]; then
continue # Skip non-terragrunt dirs
fi
echo "$stack" >> .app_apply
done
wait
- name: cleanup-and-push
image: alpine
commands:
- "rm -f .vault-env"
- "apk update && apk add openssh-client git git-crypt"
- "mkdir -p ~/.ssh && ssh-keyscan -H github.com >> ~/.ssh/known_hosts"
- "chmod 400 secrets/deploy_key"
# Only add specific paths — never git add .
- "git add stacks/ state/ .woodpecker/ || true"
- "git remote set-url origin git@github.com:ViktorBarzin/infra.git"
- "git commit -m 'Woodpecker CI deploy commit [CI SKIP]' || echo 'No changes'"
- "GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git pull --rebase origin master || true"
- "GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git push origin master"
when:
status: [success, failure]
PLATFORM_COUNT=$(wc -l < .platform_apply | tr -d ' ')
APP_COUNT=$(wc -l < .app_apply | tr -d ' ')
echo "Platform stacks to apply: $PLATFORM_COUNT"
echo "App stacks to apply: $APP_COUNT"
cat .platform_apply .app_apply
- name: slack
# ── Pre-warm provider cache ──
- |
if [ -s .platform_apply ] || [ -s .app_apply ]; then
FIRST_STACK=$(head -1 .platform_apply .app_apply 2>/dev/null | head -1)
if [ -n "$FIRST_STACK" ]; then
echo "Pre-warming provider cache from stacks/$FIRST_STACK..."
cd "stacks/$FIRST_STACK" && terragrunt init --terragrunt-non-interactive -input=false 2>&1 | tail -3 && cd ../..
fi
fi
# ── Apply platform stacks (with concurrency limit) ──
- |
if [ -s .platform_apply ]; then
echo "=== Applying platform stacks (max 4 parallel) ==="
cat .platform_apply | xargs -P 4 -I{} sh -c '
echo "[{}] Starting apply..."
cd stacks/{} && terragrunt apply --non-interactive -auto-approve 2>&1 | tail -5
EXIT=$?
if [ $EXIT -ne 0 ]; then
echo "[{}] FAILED (exit $EXIT)"
else
echo "[{}] OK"
fi
'
fi
# ── Apply app stacks (with concurrency limit) ──
- |
if [ -s .app_apply ]; then
echo "=== Applying app stacks (max 4 parallel) ==="
cat .app_apply | xargs -P 4 -I{} sh -c '
echo "[{}] Starting apply..."
cd stacks/{} && terragrunt apply --non-interactive -auto-approve 2>&1 | tail -5
EXIT=$?
if [ $EXIT -ne 0 ]; then
echo "[{}] FAILED (exit $EXIT)"
else
echo "[{}] OK"
fi
'
fi
# ── Commit and push state changes ──
- |
mkdir -p ~/.ssh && ssh-keyscan -H github.com >> ~/.ssh/known_hosts 2>/dev/null
chmod 400 secrets/deploy_key
git add stacks/ state/ .woodpecker/ 2>/dev/null || true
git remote set-url origin git@github.com:ViktorBarzin/infra.git
git diff --cached --quiet && echo "No changes to commit" && exit 0
git commit -m "Woodpecker CI deploy [CI SKIP]"
GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git fetch origin master
GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git rebase origin/master || true
GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git push origin master
# ── Slack notification ──
- |
PLATFORM_COUNT=$(wc -l < .platform_apply 2>/dev/null | tr -d ' ')
APP_COUNT=$(wc -l < .app_apply 2>/dev/null | tr -d ' ')
curl -s -X POST -H 'Content-type: application/json' \
--data "{\"channel\":\"general\",\"text\":\"Woodpecker CI: infra pipeline ${CI_PIPELINE_STATUS} (platform:${PLATFORM_COUNT}, apps:${APP_COUNT})\"}" \
"$SLACK_WEBHOOK" || true
# Slack on failure (runs even if apply step fails)
- name: notify-failure
image: curlimages/curl
commands:
- |
curl -s -X POST -H 'Content-type: application/json' \
--data "{\"channel\":\"general\",\"text\":\"Woodpecker CI: infra pipeline ${CI_PIPELINE_STATUS}\"}" \
--data "{\"channel\":\"general\",\"text\":\":red_circle: Woodpecker CI: infra pipeline FAILED\"}" \
"$SLACK_WEBHOOK" || true
environment:
SLACK_WEBHOOK:
from_secret: slack_webhook
when:
status: [success, failure]
status: [failure]

View file

@ -0,0 +1,80 @@
# Daily drift detection — runs terraform plan on all stacks and alerts on drift.
# Triggered by Woodpecker cron schedule "drift-detection" (must be registered in Woodpecker UI/API).
when:
event: cron
cron: drift-detection
clone:
git:
image: woodpeckerci/plugin-git
settings:
depth: 1
attempts: 3
steps:
- name: detect-drift
image: registry.viktorbarzin.me:5050/infra-ci:latest
pull: true
backend_options:
kubernetes:
resources:
requests:
memory: 2Gi
limits:
memory: 4Gi
environment:
SLACK_WEBHOOK:
from_secret: slack_webhook
commands:
# ── git-crypt unlock ──
- |
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
curl -sk "https://10.0.20.100:6443/api/v1/namespaces/woodpecker/configmaps/git-crypt-key" \
-H "Authorization:Bearer $SA_TOKEN" | jq -r .data.key | base64 -d > /tmp/key
git-crypt unlock /tmp/key && rm /tmp/key
# ── Vault auth ──
- |
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
export VAULT_ADDR=http://vault-active.vault.svc.cluster.local:8200
export VAULT_TOKEN=$(curl -s -X POST "$VAULT_ADDR/v1/auth/kubernetes/login" \
-d "{\"role\":\"ci\",\"jwt\":\"$SA_TOKEN\"}" | jq -r .auth.client_token)
# ── Run terraform plan on all stacks ──
- |
DRIFTED=""
CLEAN=0
ERRORS=""
for stack_dir in stacks/*/; do
stack=$(basename "$stack_dir")
[ -f "$stack_dir/terragrunt.hcl" ] || continue
echo -n "[$stack] planning... "
OUTPUT=$(cd "$stack_dir" && terragrunt plan -detailed-exitcode -input=false 2>&1)
EXIT=$?
case $EXIT in
0) echo "OK (no changes)"; CLEAN=$((CLEAN + 1)) ;;
1) echo "ERROR"; ERRORS="$ERRORS $stack" ;;
2) echo "DRIFT DETECTED"; DRIFTED="$DRIFTED $stack" ;;
esac
done
echo ""
echo "=== Drift Detection Summary ==="
echo "Clean: $CLEAN stacks"
echo "Drift: ${DRIFTED:-none}"
echo "Errors: ${ERRORS:-none}"
# ── Slack alert if drift found ──
if [ -n "$DRIFTED" ]; then
curl -s -X POST -H 'Content-type: application/json' \
--data "{\"channel\":\"general\",\"text\":\":warning: Drift detected in:${DRIFTED}\nClean: ${CLEAN} stacks. Errors:${ERRORS:-none}\"}" \
"$SLACK_WEBHOOK" || true
else
curl -s -X POST -H 'Content-type: application/json' \
--data "{\"channel\":\"general\",\"text\":\":white_check_mark: Drift detection: all ${CLEAN} stacks clean${ERRORS:+. Errors: $ERRORS}\"}" \
"$SLACK_WEBHOOK" || true
fi

42
ci/Dockerfile Normal file
View file

@ -0,0 +1,42 @@
FROM alpine:3.20
# Pin versions to match CI requirements
ARG TERRAFORM_VERSION=1.5.7
ARG TERRAGRUNT_VERSION=0.99.4
ARG SOPS_VERSION=3.9.4
ARG KUBECTL_VERSION=1.34.0
# Install system packages (single layer)
RUN apk add --no-cache \
bash curl git git-crypt jq openssh-client openssl unzip \
&& rm -rf /var/cache/apk/*
# Terraform
RUN curl -fsSL "https://releases.hashicorp.com/terraform/${TERRAFORM_VERSION}/terraform_${TERRAFORM_VERSION}_linux_amd64.zip" \
-o /tmp/terraform.zip \
&& unzip /tmp/terraform.zip -d /usr/local/bin/ \
&& rm /tmp/terraform.zip \
&& terraform version
# Terragrunt
RUN curl -fsSL "https://github.com/gruntwork-io/terragrunt/releases/download/v${TERRAGRUNT_VERSION}/terragrunt_linux_amd64" \
-o /usr/local/bin/terragrunt \
&& chmod +x /usr/local/bin/terragrunt \
&& terragrunt --version
# SOPS (for state encryption)
RUN curl -fsSL "https://github.com/getsops/sops/releases/download/v${SOPS_VERSION}/sops-v${SOPS_VERSION}.linux.amd64" \
-o /usr/local/bin/sops \
&& chmod +x /usr/local/bin/sops
# kubectl
RUN curl -fsSL "https://dl.k8s.io/release/v${KUBECTL_VERSION}/bin/linux/amd64/kubectl" \
-o /usr/local/bin/kubectl \
&& chmod +x /usr/local/bin/kubectl
# Provider cache directory (shared across stacks)
ENV TF_PLUGIN_CACHE_DIR=/tmp/terraform-plugin-cache
ENV TF_PLUGIN_CACHE_MAY_BREAK_DEPENDENCY_LOCK_FILE=1
RUN mkdir -p /tmp/terraform-plugin-cache
WORKDIR /workspace

View file

@ -1,23 +1,61 @@
#!/usr/bin/env bash
# scripts/tg — wrapper: decrypt state before, encrypt+commit after mutating ops
# Usage: scripts/tg apply --non-interactive
# scripts/tg run --all -- plan
# scripts/tg plan
# Auth: `vault login -method=oidc` (token at ~/.vault-token)
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
SYNC="$REPO_ROOT/scripts/state-sync"
# Enable provider cache (shared across stacks)
export TF_PLUGIN_CACHE_DIR="${TF_PLUGIN_CACHE_DIR:-$HOME/.terraform.d/plugin-cache}"
export TF_PLUGIN_CACHE_MAY_BREAK_DEPENDENCY_LOCK_FILE=1
mkdir -p "$TF_PLUGIN_CACHE_DIR"
# Determine stack name from cwd (relative to stacks/)
STACK_NAME=""
cwd="$(pwd)"
stacks_dir="$REPO_ROOT/stacks"
if [[ "$cwd" == "$stacks_dir"/* ]]; then
# Get first path component relative to stacks/
rel="${cwd#$stacks_dir/}"
STACK_NAME="${rel%%/*}"
fi
# ── Advisory lock via Vault KV ──
LOCK_MAX_AGE=1800 # 30 minutes — stale lock threshold
acquire_lock() {
local stack="$1"
local vault_addr="${VAULT_ADDR:-https://vault.viktorbarzin.me}"
local lock_path="secret/data/locks/$stack"
local holder="pid=$$,host=$(hostname -s),user=$(whoami)"
# Check if lock exists and is not stale
local existing
existing=$(vault kv get -format=json "secret/locks/$stack" 2>/dev/null || echo '{}')
local locked=$(echo "$existing" | jq -r '.data.data.locked // "false"')
local acquired=$(echo "$existing" | jq -r '.data.data.acquired // "0"')
local existing_holder=$(echo "$existing" | jq -r '.data.data.holder // ""')
if [ "$locked" = "true" ]; then
local now=$(date +%s)
local age=$((now - acquired))
if [ "$age" -lt "$LOCK_MAX_AGE" ]; then
echo "ERROR: Stack '$stack' is locked by: $existing_holder (${age}s ago)"
echo " Wait for it to finish or run: vault kv delete secret/locks/$stack"
return 1
fi
echo "WARNING: Breaking stale lock on '$stack' (held ${age}s by $existing_holder)"
fi
vault kv put "secret/locks/$stack" locked=true holder="$holder" acquired="$(date +%s)" >/dev/null
}
release_lock() {
local stack="$1"
vault kv delete "secret/locks/$stack" >/dev/null 2>&1 || true
}
# Decrypt state before any operation
if [ -n "$STACK_NAME" ] && [ -f "$REPO_ROOT/state/stacks/$STACK_NAME/terraform.tfstate.enc" ]; then
"$SYNC" decrypt "$STACK_NAME"
@ -31,6 +69,14 @@ for arg in "$@"; do
esac
done
# Acquire lock for mutating operations
if $is_mutating && [ -n "$STACK_NAME" ]; then
if command -v vault &>/dev/null && [ -n "${VAULT_TOKEN:-}" ]; then
acquire_lock "$STACK_NAME"
trap 'release_lock "$STACK_NAME"' EXIT
fi
fi
# If running apply with --non-interactive, add -auto-approve for Terraform
args=("$@")
has_apply=false
@ -43,7 +89,6 @@ for arg in "${args[@]}"; do
done
if $has_apply && $has_non_interactive; then
# Rebuild args: insert -auto-approve after apply
new_args=()
for arg in "${args[@]}"; do
new_args+=("$arg")