feat: CI/CD performance overhaul
- New custom CI Docker image (ci/Dockerfile) with TF 1.5.7, TG 0.99.4, git-crypt, sops, kubectl pre-installed. Pushed to private registry. Eliminates 17 apk add calls + binary downloads per pipeline run. - Unified CI pipeline: merge default.yml + app-stacks.yml into one. Changed-stacks-only detection (git diff, with global-file fallback). Concurrency limit (xargs -P 4). Step consolidation (2 steps vs 4). Shallow clone (depth=2). Provider cache (TF_PLUGIN_CACHE_DIR). - Per-stack Vault advisory locks in scripts/tg. 30min TTL with stale lock detection. Blocks concurrent applies to same stack. - TF_PLUGIN_CACHE_DIR enabled by default in scripts/tg for local dev. - Daily drift detection pipeline (.woodpecker/drift-detection.yml). Runs terraform plan on all stacks, Slack alert on drift. - CI image build pipeline (.woodpecker/build-ci-image.yml). Expected speedup: ~5-10 min per pipeline run → ~2-4 min. [ci skip] Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
bcad200a23
commit
36454b87d1
6 changed files with 352 additions and 177 deletions
|
|
@ -1,122 +0,0 @@
|
|||
when:
|
||||
event: push
|
||||
branch: master
|
||||
# Only trigger when application stack files change
|
||||
path:
|
||||
include:
|
||||
- 'stacks/**'
|
||||
exclude:
|
||||
- '.woodpecker/**'
|
||||
|
||||
clone:
|
||||
git:
|
||||
image: woodpeckerci/plugin-git
|
||||
settings:
|
||||
attempts: 5
|
||||
backoff: 10s
|
||||
|
||||
steps:
|
||||
- name: detect-changes
|
||||
image: alpine
|
||||
commands:
|
||||
- apk add --no-cache git
|
||||
# Detect which stacks changed in the latest commit
|
||||
- |
|
||||
CHANGED=$(git diff --name-only HEAD~1 HEAD 2>/dev/null | grep '^stacks/' | cut -d/ -f2 | sort -u || true)
|
||||
if [ -z "$CHANGED" ]; then
|
||||
echo "No stack changes detected"
|
||||
echo "" > .stacks_to_apply
|
||||
exit 0
|
||||
fi
|
||||
# Exclude platform stacks (handled by default.yml)
|
||||
PLATFORM="dbaas authentik crowdsec monitoring nvidia mailserver cloudflared kyverno metallb redis traefik technitium headscale rbac k8s-portal vaultwarden reverse-proxy metrics-server vpa nfs-csi iscsi-csi cnpg sealed-secrets uptime-kuma wireguard xray infra-maintenance platform vault reloader descheduler external-secrets"
|
||||
APPLY=""
|
||||
for stack in $CHANGED; do
|
||||
if echo "$PLATFORM" | grep -qw "$stack"; then
|
||||
echo "Skipping platform stack: $stack"
|
||||
continue
|
||||
fi
|
||||
if [ ! -f "stacks/$stack/terragrunt.hcl" ]; then
|
||||
echo "Skipping $stack (no terragrunt.hcl)"
|
||||
continue
|
||||
fi
|
||||
APPLY="$APPLY $stack"
|
||||
done
|
||||
echo "$APPLY" > .stacks_to_apply
|
||||
echo "Stacks to apply:$APPLY"
|
||||
|
||||
- name: prepare
|
||||
image: alpine
|
||||
commands:
|
||||
- "apk update && apk add jq curl git git-crypt"
|
||||
- |
|
||||
curl -k https://10.0.20.100:6443/api/v1/namespaces/woodpecker/configmaps/git-crypt-key -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" | jq -r .data.key | base64 -d > /tmp/key
|
||||
- "git-crypt unlock /tmp/key && rm /tmp/key"
|
||||
- |
|
||||
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
|
||||
VAULT_TOKEN=$(curl -s -X POST http://vault-active.vault.svc.cluster.local:8200/v1/auth/kubernetes/login \
|
||||
-d "{\"role\":\"ci\",\"jwt\":\"$SA_TOKEN\"}" | jq -r .auth.client_token)
|
||||
echo "export VAULT_TOKEN=$VAULT_TOKEN" > .vault-env
|
||||
echo "export VAULT_ADDR=http://vault-active.vault.svc.cluster.local:8200" >> .vault-env
|
||||
when:
|
||||
evaluate: 'CI_COMMIT_MESSAGE != "" && !contains(CI_COMMIT_MESSAGE, "[CI SKIP]")'
|
||||
|
||||
- name: terragrunt-apply
|
||||
image: alpine
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
requests:
|
||||
memory: 2Gi
|
||||
limits:
|
||||
memory: 4Gi
|
||||
commands:
|
||||
- "apk update && apk add curl unzip git openssh-client"
|
||||
- "wget -qO /tmp/terraform.zip https://releases.hashicorp.com/terraform/1.5.7/terraform_1.5.7_linux_amd64.zip"
|
||||
- "unzip -o /tmp/terraform.zip -d /usr/local/bin/ && chmod 755 /usr/local/bin/terraform"
|
||||
- "wget -qO /usr/local/bin/terragrunt https://github.com/gruntwork-io/terragrunt/releases/download/v0.99.4/terragrunt_linux_amd64"
|
||||
- "chmod 755 /usr/local/bin/terragrunt"
|
||||
- "source .vault-env"
|
||||
- |
|
||||
STACKS=$(cat .stacks_to_apply)
|
||||
if [ -z "$STACKS" ]; then
|
||||
echo "No app stacks to apply"
|
||||
exit 0
|
||||
fi
|
||||
FAILED=""
|
||||
for stack in $STACKS; do
|
||||
echo "=== Applying: $stack ==="
|
||||
(cd stacks/$stack && terragrunt apply --non-interactive -auto-approve) &
|
||||
done
|
||||
wait
|
||||
when:
|
||||
evaluate: 'CI_COMMIT_MESSAGE != "" && !contains(CI_COMMIT_MESSAGE, "[CI SKIP]")'
|
||||
|
||||
- name: cleanup-and-push
|
||||
image: alpine
|
||||
commands:
|
||||
- "rm -f .vault-env"
|
||||
- "apk update && apk add openssh-client git git-crypt"
|
||||
- "mkdir -p ~/.ssh && ssh-keyscan -H github.com >> ~/.ssh/known_hosts"
|
||||
- "chmod 400 secrets/deploy_key"
|
||||
- "git add stacks/ state/ .woodpecker/ || true"
|
||||
- "git remote set-url origin git@github.com:ViktorBarzin/infra.git"
|
||||
- "git commit -m 'Woodpecker CI app-stacks deploy commit [CI SKIP]' || echo 'No changes'"
|
||||
- "GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git pull --rebase origin master || true"
|
||||
- "GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git push origin master"
|
||||
when:
|
||||
status: [success, failure]
|
||||
|
||||
- name: slack
|
||||
image: curlimages/curl
|
||||
commands:
|
||||
- |
|
||||
STACKS=$(cat .stacks_to_apply 2>/dev/null || echo "none")
|
||||
curl -s -X POST -H 'Content-type: application/json' \
|
||||
--data "{\"channel\":\"general\",\"text\":\"Woodpecker CI: app-stacks pipeline ${CI_PIPELINE_STATUS} (stacks:${STACKS})\"}" \
|
||||
"$SLACK_WEBHOOK" || true
|
||||
environment:
|
||||
SLACK_WEBHOOK:
|
||||
from_secret: slack_webhook
|
||||
when:
|
||||
status: [success, failure]
|
||||
41
.woodpecker/build-ci-image.yml
Normal file
41
.woodpecker/build-ci-image.yml
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
# Build the CI tools Docker image used by all infra pipelines.
|
||||
# Triggers on changes to ci/Dockerfile or manual dispatch.
|
||||
|
||||
when:
|
||||
event: [push, manual]
|
||||
branch: master
|
||||
path:
|
||||
include:
|
||||
- 'ci/Dockerfile'
|
||||
|
||||
steps:
|
||||
- name: build-and-push
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
repo: registry.viktorbarzin.me:5050/infra-ci
|
||||
dockerfile: ci/Dockerfile
|
||||
context: ci/
|
||||
tags:
|
||||
- latest
|
||||
- "${CI_COMMIT_SHA:0:8}"
|
||||
platforms: linux/amd64
|
||||
registry: registry.viktorbarzin.me:5050
|
||||
logins:
|
||||
- registry: registry.viktorbarzin.me:5050
|
||||
username:
|
||||
from_secret: registry_user
|
||||
password:
|
||||
from_secret: registry_password
|
||||
|
||||
- name: slack
|
||||
image: curlimages/curl
|
||||
commands:
|
||||
- |
|
||||
curl -s -X POST -H 'Content-type: application/json' \
|
||||
--data "{\"text\":\"CI image built: registry.viktorbarzin.me:5050/infra-ci:${CI_COMMIT_SHA:0:8}\"}" \
|
||||
"$SLACK_WEBHOOK" || true
|
||||
environment:
|
||||
SLACK_WEBHOOK:
|
||||
from_secret: slack_webhook
|
||||
when:
|
||||
status: [success]
|
||||
|
|
@ -1,3 +1,15 @@
|
|||
# Unified infra CI pipeline — detects changed stacks and applies only those.
|
||||
# Platform stacks and app stacks handled in one pipeline with proper ordering.
|
||||
#
|
||||
# Optimizations over the previous split pipeline:
|
||||
# - Custom CI image (no apk/wget per step)
|
||||
# - Shallow clone (depth=2 for git diff HEAD~1)
|
||||
# - TF_PLUGIN_CACHE_DIR (shared provider cache)
|
||||
# - Concurrency limit (xargs -P 4)
|
||||
# - Step consolidation (2 steps instead of 4)
|
||||
# - Changed-stacks-only detection (skips no-op applies)
|
||||
# - Global-file fallback (modules/config changes trigger full apply)
|
||||
|
||||
when:
|
||||
event: push
|
||||
branch: master
|
||||
|
|
@ -6,28 +18,14 @@ clone:
|
|||
git:
|
||||
image: woodpeckerci/plugin-git
|
||||
settings:
|
||||
depth: 2
|
||||
attempts: 5
|
||||
backoff: 10s
|
||||
|
||||
steps:
|
||||
- name: prepare
|
||||
image: alpine
|
||||
commands:
|
||||
- "apk update && apk add jq curl git git-crypt"
|
||||
# git-crypt for secrets/ directory (TLS certs, deploy key)
|
||||
- |
|
||||
curl -k https://10.0.20.100:6443/api/v1/namespaces/woodpecker/configmaps/git-crypt-key -H "Authorization:Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" | jq -r .data.key | base64 -d > /tmp/key
|
||||
- "git-crypt unlock /tmp/key && rm /tmp/key"
|
||||
# Vault: authenticate via K8s service account JWT
|
||||
- |
|
||||
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
|
||||
VAULT_TOKEN=$(curl -s -X POST http://vault-active.vault.svc.cluster.local:8200/v1/auth/kubernetes/login \
|
||||
-d "{\"role\":\"ci\",\"jwt\":\"$SA_TOKEN\"}" | jq -r .auth.client_token)
|
||||
echo "export VAULT_TOKEN=$VAULT_TOKEN" > .vault-env
|
||||
echo "export VAULT_ADDR=http://vault-active.vault.svc.cluster.local:8200" >> .vault-env
|
||||
|
||||
- name: terragrunt-apply
|
||||
image: alpine
|
||||
- name: apply
|
||||
image: registry.viktorbarzin.me:5050/infra-ci:latest
|
||||
pull: true
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
|
|
@ -35,51 +33,142 @@ steps:
|
|||
memory: 3Gi
|
||||
limits:
|
||||
memory: 6Gi
|
||||
environment:
|
||||
SLACK_WEBHOOK:
|
||||
from_secret: slack_webhook
|
||||
commands:
|
||||
- "apk update && apk add curl unzip git openssh-client"
|
||||
# Install Terraform
|
||||
- "wget -qO /tmp/terraform.zip https://releases.hashicorp.com/terraform/1.5.7/terraform_1.5.7_linux_amd64.zip"
|
||||
- "unzip -o /tmp/terraform.zip -d /usr/local/bin/ && chmod 755 /usr/local/bin/terraform"
|
||||
# Install Terragrunt
|
||||
- "wget -qO /usr/local/bin/terragrunt https://github.com/gruntwork-io/terragrunt/releases/download/v0.99.4/terragrunt_linux_amd64"
|
||||
- "chmod 755 /usr/local/bin/terragrunt"
|
||||
# Source Vault token
|
||||
- "source .vault-env"
|
||||
# Apply all platform stacks in parallel
|
||||
# ── Skip CI commits ──
|
||||
- |
|
||||
for stack in dbaas authentik crowdsec monitoring nvidia mailserver cloudflared kyverno \
|
||||
metallb redis traefik technitium headscale rbac k8s-portal vaultwarden \
|
||||
reverse-proxy metrics-server vpa nfs-csi iscsi-csi cnpg sealed-secrets \
|
||||
uptime-kuma wireguard xray infra-maintenance platform; do
|
||||
(cd stacks/$stack && terragrunt apply --non-interactive -auto-approve) &
|
||||
if echo "$CI_COMMIT_MESSAGE" | grep -q '\[CI SKIP\]\|\[ci skip\]'; then
|
||||
echo "Commit has [CI SKIP], exiting"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# ── git-crypt unlock ──
|
||||
- |
|
||||
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
|
||||
curl -sk "https://10.0.20.100:6443/api/v1/namespaces/woodpecker/configmaps/git-crypt-key" \
|
||||
-H "Authorization:Bearer $SA_TOKEN" | jq -r .data.key | base64 -d > /tmp/key
|
||||
git-crypt unlock /tmp/key && rm /tmp/key
|
||||
|
||||
# ── Vault auth ──
|
||||
- |
|
||||
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
|
||||
export VAULT_ADDR=http://vault-active.vault.svc.cluster.local:8200
|
||||
export VAULT_TOKEN=$(curl -s -X POST "$VAULT_ADDR/v1/auth/kubernetes/login" \
|
||||
-d "{\"role\":\"ci\",\"jwt\":\"$SA_TOKEN\"}" | jq -r .auth.client_token)
|
||||
|
||||
# ── Detect changed stacks ──
|
||||
- |
|
||||
PLATFORM_STACKS="dbaas authentik crowdsec monitoring nvidia mailserver cloudflared kyverno metallb redis traefik technitium headscale rbac k8s-portal vaultwarden reverse-proxy metrics-server vpa nfs-csi iscsi-csi cnpg sealed-secrets uptime-kuma wireguard xray infra-maintenance platform vault reloader descheduler external-secrets"
|
||||
|
||||
# Check if global files changed (triggers full platform apply)
|
||||
GLOBAL_CHANGED=$(git diff --name-only HEAD~1 HEAD 2>/dev/null | grep -E '^(modules/|config\.tfvars|terragrunt\.hcl)' || true)
|
||||
|
||||
if [ -n "$GLOBAL_CHANGED" ]; then
|
||||
echo "Global files changed — applying ALL platform stacks"
|
||||
echo "$PLATFORM_STACKS" | tr ' ' '\n' > .platform_apply
|
||||
else
|
||||
# Detect platform stacks that changed
|
||||
git diff --name-only HEAD~1 HEAD 2>/dev/null | grep '^stacks/' | cut -d/ -f2 | sort -u > .all_changed
|
||||
> .platform_apply
|
||||
while read -r stack; do
|
||||
if echo "$PLATFORM_STACKS" | grep -qw "$stack"; then
|
||||
echo "$stack" >> .platform_apply
|
||||
fi
|
||||
done < .all_changed
|
||||
fi
|
||||
|
||||
# Detect app stacks that changed
|
||||
> .app_apply
|
||||
git diff --name-only HEAD~1 HEAD 2>/dev/null | grep '^stacks/' | cut -d/ -f2 | sort -u | while read -r stack; do
|
||||
if echo "$PLATFORM_STACKS" | grep -qw "$stack"; then
|
||||
continue # Skip platform stacks
|
||||
fi
|
||||
if [ ! -f "stacks/$stack/terragrunt.hcl" ]; then
|
||||
continue # Skip non-terragrunt dirs
|
||||
fi
|
||||
echo "$stack" >> .app_apply
|
||||
done
|
||||
wait
|
||||
|
||||
- name: cleanup-and-push
|
||||
image: alpine
|
||||
commands:
|
||||
- "rm -f .vault-env"
|
||||
- "apk update && apk add openssh-client git git-crypt"
|
||||
- "mkdir -p ~/.ssh && ssh-keyscan -H github.com >> ~/.ssh/known_hosts"
|
||||
- "chmod 400 secrets/deploy_key"
|
||||
# Only add specific paths — never git add .
|
||||
- "git add stacks/ state/ .woodpecker/ || true"
|
||||
- "git remote set-url origin git@github.com:ViktorBarzin/infra.git"
|
||||
- "git commit -m 'Woodpecker CI deploy commit [CI SKIP]' || echo 'No changes'"
|
||||
- "GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git pull --rebase origin master || true"
|
||||
- "GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git push origin master"
|
||||
when:
|
||||
status: [success, failure]
|
||||
PLATFORM_COUNT=$(wc -l < .platform_apply | tr -d ' ')
|
||||
APP_COUNT=$(wc -l < .app_apply | tr -d ' ')
|
||||
echo "Platform stacks to apply: $PLATFORM_COUNT"
|
||||
echo "App stacks to apply: $APP_COUNT"
|
||||
cat .platform_apply .app_apply
|
||||
|
||||
- name: slack
|
||||
# ── Pre-warm provider cache ──
|
||||
- |
|
||||
if [ -s .platform_apply ] || [ -s .app_apply ]; then
|
||||
FIRST_STACK=$(head -1 .platform_apply .app_apply 2>/dev/null | head -1)
|
||||
if [ -n "$FIRST_STACK" ]; then
|
||||
echo "Pre-warming provider cache from stacks/$FIRST_STACK..."
|
||||
cd "stacks/$FIRST_STACK" && terragrunt init --terragrunt-non-interactive -input=false 2>&1 | tail -3 && cd ../..
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── Apply platform stacks (with concurrency limit) ──
|
||||
- |
|
||||
if [ -s .platform_apply ]; then
|
||||
echo "=== Applying platform stacks (max 4 parallel) ==="
|
||||
cat .platform_apply | xargs -P 4 -I{} sh -c '
|
||||
echo "[{}] Starting apply..."
|
||||
cd stacks/{} && terragrunt apply --non-interactive -auto-approve 2>&1 | tail -5
|
||||
EXIT=$?
|
||||
if [ $EXIT -ne 0 ]; then
|
||||
echo "[{}] FAILED (exit $EXIT)"
|
||||
else
|
||||
echo "[{}] OK"
|
||||
fi
|
||||
'
|
||||
fi
|
||||
|
||||
# ── Apply app stacks (with concurrency limit) ──
|
||||
- |
|
||||
if [ -s .app_apply ]; then
|
||||
echo "=== Applying app stacks (max 4 parallel) ==="
|
||||
cat .app_apply | xargs -P 4 -I{} sh -c '
|
||||
echo "[{}] Starting apply..."
|
||||
cd stacks/{} && terragrunt apply --non-interactive -auto-approve 2>&1 | tail -5
|
||||
EXIT=$?
|
||||
if [ $EXIT -ne 0 ]; then
|
||||
echo "[{}] FAILED (exit $EXIT)"
|
||||
else
|
||||
echo "[{}] OK"
|
||||
fi
|
||||
'
|
||||
fi
|
||||
|
||||
# ── Commit and push state changes ──
|
||||
- |
|
||||
mkdir -p ~/.ssh && ssh-keyscan -H github.com >> ~/.ssh/known_hosts 2>/dev/null
|
||||
chmod 400 secrets/deploy_key
|
||||
git add stacks/ state/ .woodpecker/ 2>/dev/null || true
|
||||
git remote set-url origin git@github.com:ViktorBarzin/infra.git
|
||||
git diff --cached --quiet && echo "No changes to commit" && exit 0
|
||||
git commit -m "Woodpecker CI deploy [CI SKIP]"
|
||||
GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git fetch origin master
|
||||
GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git rebase origin/master || true
|
||||
GIT_SSH_COMMAND='ssh -i ./secrets/deploy_key -o IdentitiesOnly=yes' git push origin master
|
||||
|
||||
# ── Slack notification ──
|
||||
- |
|
||||
PLATFORM_COUNT=$(wc -l < .platform_apply 2>/dev/null | tr -d ' ')
|
||||
APP_COUNT=$(wc -l < .app_apply 2>/dev/null | tr -d ' ')
|
||||
curl -s -X POST -H 'Content-type: application/json' \
|
||||
--data "{\"channel\":\"general\",\"text\":\"Woodpecker CI: infra pipeline ${CI_PIPELINE_STATUS} (platform:${PLATFORM_COUNT}, apps:${APP_COUNT})\"}" \
|
||||
"$SLACK_WEBHOOK" || true
|
||||
|
||||
# Slack on failure (runs even if apply step fails)
|
||||
- name: notify-failure
|
||||
image: curlimages/curl
|
||||
commands:
|
||||
- |
|
||||
curl -s -X POST -H 'Content-type: application/json' \
|
||||
--data "{\"channel\":\"general\",\"text\":\"Woodpecker CI: infra pipeline ${CI_PIPELINE_STATUS}\"}" \
|
||||
--data "{\"channel\":\"general\",\"text\":\":red_circle: Woodpecker CI: infra pipeline FAILED\"}" \
|
||||
"$SLACK_WEBHOOK" || true
|
||||
environment:
|
||||
SLACK_WEBHOOK:
|
||||
from_secret: slack_webhook
|
||||
when:
|
||||
status: [success, failure]
|
||||
status: [failure]
|
||||
|
|
|
|||
80
.woodpecker/drift-detection.yml
Normal file
80
.woodpecker/drift-detection.yml
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
# Daily drift detection — runs terraform plan on all stacks and alerts on drift.
|
||||
# Triggered by Woodpecker cron schedule "drift-detection" (must be registered in Woodpecker UI/API).
|
||||
|
||||
when:
|
||||
event: cron
|
||||
cron: drift-detection
|
||||
|
||||
clone:
|
||||
git:
|
||||
image: woodpeckerci/plugin-git
|
||||
settings:
|
||||
depth: 1
|
||||
attempts: 3
|
||||
|
||||
steps:
|
||||
- name: detect-drift
|
||||
image: registry.viktorbarzin.me:5050/infra-ci:latest
|
||||
pull: true
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
requests:
|
||||
memory: 2Gi
|
||||
limits:
|
||||
memory: 4Gi
|
||||
environment:
|
||||
SLACK_WEBHOOK:
|
||||
from_secret: slack_webhook
|
||||
commands:
|
||||
# ── git-crypt unlock ──
|
||||
- |
|
||||
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
|
||||
curl -sk "https://10.0.20.100:6443/api/v1/namespaces/woodpecker/configmaps/git-crypt-key" \
|
||||
-H "Authorization:Bearer $SA_TOKEN" | jq -r .data.key | base64 -d > /tmp/key
|
||||
git-crypt unlock /tmp/key && rm /tmp/key
|
||||
|
||||
# ── Vault auth ──
|
||||
- |
|
||||
SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
|
||||
export VAULT_ADDR=http://vault-active.vault.svc.cluster.local:8200
|
||||
export VAULT_TOKEN=$(curl -s -X POST "$VAULT_ADDR/v1/auth/kubernetes/login" \
|
||||
-d "{\"role\":\"ci\",\"jwt\":\"$SA_TOKEN\"}" | jq -r .auth.client_token)
|
||||
|
||||
# ── Run terraform plan on all stacks ──
|
||||
- |
|
||||
DRIFTED=""
|
||||
CLEAN=0
|
||||
ERRORS=""
|
||||
|
||||
for stack_dir in stacks/*/; do
|
||||
stack=$(basename "$stack_dir")
|
||||
[ -f "$stack_dir/terragrunt.hcl" ] || continue
|
||||
|
||||
echo -n "[$stack] planning... "
|
||||
OUTPUT=$(cd "$stack_dir" && terragrunt plan -detailed-exitcode -input=false 2>&1)
|
||||
EXIT=$?
|
||||
|
||||
case $EXIT in
|
||||
0) echo "OK (no changes)"; CLEAN=$((CLEAN + 1)) ;;
|
||||
1) echo "ERROR"; ERRORS="$ERRORS $stack" ;;
|
||||
2) echo "DRIFT DETECTED"; DRIFTED="$DRIFTED $stack" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=== Drift Detection Summary ==="
|
||||
echo "Clean: $CLEAN stacks"
|
||||
echo "Drift: ${DRIFTED:-none}"
|
||||
echo "Errors: ${ERRORS:-none}"
|
||||
|
||||
# ── Slack alert if drift found ──
|
||||
if [ -n "$DRIFTED" ]; then
|
||||
curl -s -X POST -H 'Content-type: application/json' \
|
||||
--data "{\"channel\":\"general\",\"text\":\":warning: Drift detected in:${DRIFTED}\nClean: ${CLEAN} stacks. Errors:${ERRORS:-none}\"}" \
|
||||
"$SLACK_WEBHOOK" || true
|
||||
else
|
||||
curl -s -X POST -H 'Content-type: application/json' \
|
||||
--data "{\"channel\":\"general\",\"text\":\":white_check_mark: Drift detection: all ${CLEAN} stacks clean${ERRORS:+. Errors: $ERRORS}\"}" \
|
||||
"$SLACK_WEBHOOK" || true
|
||||
fi
|
||||
42
ci/Dockerfile
Normal file
42
ci/Dockerfile
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
FROM alpine:3.20
|
||||
|
||||
# Pin versions to match CI requirements
|
||||
ARG TERRAFORM_VERSION=1.5.7
|
||||
ARG TERRAGRUNT_VERSION=0.99.4
|
||||
ARG SOPS_VERSION=3.9.4
|
||||
ARG KUBECTL_VERSION=1.34.0
|
||||
|
||||
# Install system packages (single layer)
|
||||
RUN apk add --no-cache \
|
||||
bash curl git git-crypt jq openssh-client openssl unzip \
|
||||
&& rm -rf /var/cache/apk/*
|
||||
|
||||
# Terraform
|
||||
RUN curl -fsSL "https://releases.hashicorp.com/terraform/${TERRAFORM_VERSION}/terraform_${TERRAFORM_VERSION}_linux_amd64.zip" \
|
||||
-o /tmp/terraform.zip \
|
||||
&& unzip /tmp/terraform.zip -d /usr/local/bin/ \
|
||||
&& rm /tmp/terraform.zip \
|
||||
&& terraform version
|
||||
|
||||
# Terragrunt
|
||||
RUN curl -fsSL "https://github.com/gruntwork-io/terragrunt/releases/download/v${TERRAGRUNT_VERSION}/terragrunt_linux_amd64" \
|
||||
-o /usr/local/bin/terragrunt \
|
||||
&& chmod +x /usr/local/bin/terragrunt \
|
||||
&& terragrunt --version
|
||||
|
||||
# SOPS (for state encryption)
|
||||
RUN curl -fsSL "https://github.com/getsops/sops/releases/download/v${SOPS_VERSION}/sops-v${SOPS_VERSION}.linux.amd64" \
|
||||
-o /usr/local/bin/sops \
|
||||
&& chmod +x /usr/local/bin/sops
|
||||
|
||||
# kubectl
|
||||
RUN curl -fsSL "https://dl.k8s.io/release/v${KUBECTL_VERSION}/bin/linux/amd64/kubectl" \
|
||||
-o /usr/local/bin/kubectl \
|
||||
&& chmod +x /usr/local/bin/kubectl
|
||||
|
||||
# Provider cache directory (shared across stacks)
|
||||
ENV TF_PLUGIN_CACHE_DIR=/tmp/terraform-plugin-cache
|
||||
ENV TF_PLUGIN_CACHE_MAY_BREAK_DEPENDENCY_LOCK_FILE=1
|
||||
RUN mkdir -p /tmp/terraform-plugin-cache
|
||||
|
||||
WORKDIR /workspace
|
||||
51
scripts/tg
51
scripts/tg
|
|
@ -1,23 +1,61 @@
|
|||
#!/usr/bin/env bash
|
||||
# scripts/tg — wrapper: decrypt state before, encrypt+commit after mutating ops
|
||||
# Usage: scripts/tg apply --non-interactive
|
||||
# scripts/tg run --all -- plan
|
||||
# scripts/tg plan
|
||||
# Auth: `vault login -method=oidc` (token at ~/.vault-token)
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
SYNC="$REPO_ROOT/scripts/state-sync"
|
||||
|
||||
# Enable provider cache (shared across stacks)
|
||||
export TF_PLUGIN_CACHE_DIR="${TF_PLUGIN_CACHE_DIR:-$HOME/.terraform.d/plugin-cache}"
|
||||
export TF_PLUGIN_CACHE_MAY_BREAK_DEPENDENCY_LOCK_FILE=1
|
||||
mkdir -p "$TF_PLUGIN_CACHE_DIR"
|
||||
|
||||
# Determine stack name from cwd (relative to stacks/)
|
||||
STACK_NAME=""
|
||||
cwd="$(pwd)"
|
||||
stacks_dir="$REPO_ROOT/stacks"
|
||||
if [[ "$cwd" == "$stacks_dir"/* ]]; then
|
||||
# Get first path component relative to stacks/
|
||||
rel="${cwd#$stacks_dir/}"
|
||||
STACK_NAME="${rel%%/*}"
|
||||
fi
|
||||
|
||||
# ── Advisory lock via Vault KV ──
|
||||
LOCK_MAX_AGE=1800 # 30 minutes — stale lock threshold
|
||||
acquire_lock() {
|
||||
local stack="$1"
|
||||
local vault_addr="${VAULT_ADDR:-https://vault.viktorbarzin.me}"
|
||||
local lock_path="secret/data/locks/$stack"
|
||||
local holder="pid=$$,host=$(hostname -s),user=$(whoami)"
|
||||
|
||||
# Check if lock exists and is not stale
|
||||
local existing
|
||||
existing=$(vault kv get -format=json "secret/locks/$stack" 2>/dev/null || echo '{}')
|
||||
local locked=$(echo "$existing" | jq -r '.data.data.locked // "false"')
|
||||
local acquired=$(echo "$existing" | jq -r '.data.data.acquired // "0"')
|
||||
local existing_holder=$(echo "$existing" | jq -r '.data.data.holder // ""')
|
||||
|
||||
if [ "$locked" = "true" ]; then
|
||||
local now=$(date +%s)
|
||||
local age=$((now - acquired))
|
||||
if [ "$age" -lt "$LOCK_MAX_AGE" ]; then
|
||||
echo "ERROR: Stack '$stack' is locked by: $existing_holder (${age}s ago)"
|
||||
echo " Wait for it to finish or run: vault kv delete secret/locks/$stack"
|
||||
return 1
|
||||
fi
|
||||
echo "WARNING: Breaking stale lock on '$stack' (held ${age}s by $existing_holder)"
|
||||
fi
|
||||
|
||||
vault kv put "secret/locks/$stack" locked=true holder="$holder" acquired="$(date +%s)" >/dev/null
|
||||
}
|
||||
|
||||
release_lock() {
|
||||
local stack="$1"
|
||||
vault kv delete "secret/locks/$stack" >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
# Decrypt state before any operation
|
||||
if [ -n "$STACK_NAME" ] && [ -f "$REPO_ROOT/state/stacks/$STACK_NAME/terraform.tfstate.enc" ]; then
|
||||
"$SYNC" decrypt "$STACK_NAME"
|
||||
|
|
@ -31,6 +69,14 @@ for arg in "$@"; do
|
|||
esac
|
||||
done
|
||||
|
||||
# Acquire lock for mutating operations
|
||||
if $is_mutating && [ -n "$STACK_NAME" ]; then
|
||||
if command -v vault &>/dev/null && [ -n "${VAULT_TOKEN:-}" ]; then
|
||||
acquire_lock "$STACK_NAME"
|
||||
trap 'release_lock "$STACK_NAME"' EXIT
|
||||
fi
|
||||
fi
|
||||
|
||||
# If running apply with --non-interactive, add -auto-approve for Terraform
|
||||
args=("$@")
|
||||
has_apply=false
|
||||
|
|
@ -43,7 +89,6 @@ for arg in "${args[@]}"; do
|
|||
done
|
||||
|
||||
if $has_apply && $has_non_interactive; then
|
||||
# Rebuild args: insert -auto-approve after apply
|
||||
new_args=()
|
||||
for arg in "${args[@]}"; do
|
||||
new_args+=("$arg")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue