#!/usr/bin/env bash # scripts/tg — wrapper: decrypt state before, encrypt+commit after mutating ops # Usage: scripts/tg apply --non-interactive # scripts/tg plan # Auth: `vault login -method=oidc` (token at ~/.vault-token) set -euo pipefail REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" SYNC="$REPO_ROOT/scripts/state-sync" # Enable provider cache (shared across stacks) export TF_PLUGIN_CACHE_DIR="${TF_PLUGIN_CACHE_DIR:-$HOME/.terraform.d/plugin-cache}" export TF_PLUGIN_CACHE_MAY_BREAK_DEPENDENCY_LOCK_FILE=1 mkdir -p "$TF_PLUGIN_CACHE_DIR" # Determine stack name from cwd (relative to stacks/) STACK_NAME="" cwd="$(pwd)" stacks_dir="$REPO_ROOT/stacks" if [[ "$cwd" == "$stacks_dir"/* ]]; then rel="${cwd#$stacks_dir/}" STACK_NAME="${rel%%/*}" fi # ── Tier detection ── TIER0_STACKS="infra platform cnpg vault dbaas external-secrets" is_tier0() { echo "$TIER0_STACKS" | tr ' ' '\n' | grep -qx "$1" } # ── Advisory lock via Vault KV ── LOCK_MAX_AGE=1800 # 30 minutes — stale lock threshold acquire_lock() { local stack="$1" local vault_addr="${VAULT_ADDR:-https://vault.viktorbarzin.me}" local lock_path="secret/data/locks/$stack" local holder="pid=$$,host=$(hostname -s),user=$(whoami)" # Check if lock exists and is not stale local existing existing=$(vault kv get -format=json "secret/locks/$stack" 2>/dev/null || echo '{}') local locked=$(echo "$existing" | jq -r '.data.data.locked // "false"') local acquired=$(echo "$existing" | jq -r '.data.data.acquired // "0"') local existing_holder=$(echo "$existing" | jq -r '.data.data.holder // ""') if [ "$locked" = "true" ]; then local now=$(date +%s) local age=$((now - acquired)) if [ "$age" -lt "$LOCK_MAX_AGE" ]; then echo "ERROR: Stack '$stack' is locked by: $existing_holder (${age}s ago)" echo " Wait for it to finish or run: vault kv delete secret/locks/$stack" return 1 fi echo "WARNING: Breaking stale lock on '$stack' (held ${age}s by $existing_holder)" fi vault kv put "secret/locks/$stack" locked=true holder="$holder" acquired="$(date +%s)" >/dev/null } release_lock() { local stack="$1" vault kv delete "secret/locks/$stack" >/dev/null 2>&1 || true } # ── Pre-flight: decrypt state (Tier 0) or fetch PG creds (Tier 1) ── if [ -n "$STACK_NAME" ]; then if is_tier0 "$STACK_NAME"; then # Tier 0: SOPS-encrypted local state if [ -f "$REPO_ROOT/state/stacks/$STACK_NAME/terraform.tfstate.enc" ]; then "$SYNC" decrypt "$STACK_NAME" fi else # Tier 1: PG backend — fetch credentials from Vault if [ -z "${PG_CONN_STR:-}" ]; then # Pre-flight: vault CLI must be available. Previously CI failed with a # misleading "Cannot read PG credentials" message because the Alpine CI # image lacked the vault binary — the 2>/dev/null below swallowed the # real "vault: not found" error. Fail fast with a clear message instead. if ! command -v vault >/dev/null 2>&1; then echo "ERROR: vault CLI not found on PATH. Install it or use an image that includes it (ci/Dockerfile)." >&2 exit 1 fi VAULT_OUT=$(vault read -format=json database/static-creds/pg-terraform-state 2>&1) || { echo "ERROR: Cannot read PG credentials from Vault. Vault output follows:" >&2 echo "$VAULT_OUT" >&2 echo "" >&2 echo "Hint: humans run 'vault login -method=oidc'; CI auths via K8s SA (role=ci)." >&2 exit 1 } PG_USER=$(echo "$VAULT_OUT" | jq -r .data.username) PG_PASS=$(echo "$VAULT_OUT" | jq -r .data.password) export PG_CONN_STR="postgres://${PG_USER}:${PG_PASS}@10.0.20.200:5432/terraform_state?sslmode=disable" fi fi fi # Detect if this is a mutating operation is_mutating=false for arg in "$@"; do case "$arg" in apply|destroy|import|state) is_mutating=true ;; esac done # Acquire lock for mutating operations (Tier 0 only — Tier 1 uses pg_advisory_lock) if $is_mutating && [ -n "$STACK_NAME" ] && is_tier0 "$STACK_NAME"; then if command -v vault &>/dev/null && [ -n "${VAULT_TOKEN:-}" ]; then acquire_lock "$STACK_NAME" trap 'release_lock "$STACK_NAME"' EXIT fi fi # If running apply with --non-interactive, add -auto-approve for Terraform args=("$@") has_apply=false has_non_interactive=false for arg in "${args[@]}"; do case "$arg" in apply) has_apply=true ;; --non-interactive) has_non_interactive=true ;; esac done if $has_apply && $has_non_interactive; then new_args=() for arg in "${args[@]}"; do new_args+=("$arg") if [ "$arg" = "apply" ]; then new_args+=("-auto-approve") fi done terragrunt "${new_args[@]}" else terragrunt "$@" fi # After mutating operations: encrypt+commit (Tier 0) or no-op (Tier 1 — PG is authoritative) if $is_mutating && [ -n "$STACK_NAME" ] && is_tier0 "$STACK_NAME"; then "$SYNC" encrypt "$STACK_NAME" cd "$REPO_ROOT" git add "state/stacks/$STACK_NAME/terraform.tfstate.enc" if ! git diff --cached --quiet; then git commit -m "state($STACK_NAME): update encrypted state" fi fi