From 0f262ceda3f54e33a472bb8e3583df8f8e7f55a0 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 15 Mar 2026 19:17:44 +0000 Subject: [PATCH] add pod dependency management via Kyverno init container injection Kyverno ClusterPolicy reads dependency.kyverno.io/wait-for annotation and injects busybox init containers that block until each dependency is reachable (nc -z). Annotations added to 18 stacks (24 deployments). Includes graceful-db-maintenance.sh script for planned DB maintenance (scales dependents to 0, saves replica counts, restores on startup). --- scripts/graceful-db-maintenance.sh | 143 ++++++++++++++++++ stacks/affine/main.tf | 3 + stacks/claude-memory/main.tf | 3 + stacks/dawarich/main.tf | 1 + stacks/grampsweb/main.tf | 3 + stacks/hackmd/main.tf | 3 + stacks/health/main.tf | 3 + stacks/immich/frame.tf | 3 + stacks/linkwarden/main.tf | 5 +- stacks/matrix/main.tf | 3 + stacks/n8n/main.tf | 3 + stacks/nextcloud/chart_values.yaml | 1 + stacks/ollama/main.tf | 3 + stacks/onlyoffice/main.tf | 3 + stacks/paperless-ngx/main.tf | 5 +- .../kyverno/dependency-init-containers.tf | 72 +++++++++ .../monitoring/grafana_chart_values.yaml | 2 + stacks/real-estate-crawler/main.tf | 9 ++ stacks/rybbit/main.tf | 6 + stacks/speedtest/main.tf | 3 + stacks/tandoor/main.tf | 3 + stacks/trading-bot/main.tf | 6 + 22 files changed, 282 insertions(+), 4 deletions(-) create mode 100755 scripts/graceful-db-maintenance.sh create mode 100644 stacks/platform/modules/kyverno/dependency-init-containers.tf diff --git a/scripts/graceful-db-maintenance.sh b/scripts/graceful-db-maintenance.sh new file mode 100755 index 00000000..21e43941 --- /dev/null +++ b/scripts/graceful-db-maintenance.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +# graceful-db-maintenance.sh — Scale down/up dependents of a service +# based on the dependency.kyverno.io/wait-for pod annotation. +# +# Usage: +# ./scripts/graceful-db-maintenance.sh shutdown mysql.dbaas +# # ... perform maintenance ... +# ./scripts/graceful-db-maintenance.sh startup mysql.dbaas +# +# The shutdown action saves original replica counts to a state file +# so startup can restore them exactly. + +set -euo pipefail + +ACTION="${1:-}" +SERVICE="${2:-}" +STATE_DIR="/tmp" + +usage() { + echo "Usage: $0 " + echo "" + echo "Examples:" + echo " $0 shutdown mysql.dbaas # Scale down all MySQL dependents" + echo " $0 startup mysql.dbaas # Restore all MySQL dependents" + echo " $0 shutdown postgresql.dbaas # Scale down all PostgreSQL dependents" + echo " $0 shutdown redis.redis # Scale down all Redis dependents" + exit 1 +} + +[[ -z "$ACTION" || -z "$SERVICE" ]] && usage +[[ "$ACTION" != "shutdown" && "$ACTION" != "startup" ]] && usage + +STATE_FILE="${STATE_DIR}/dep-maintenance-$(echo "$SERVICE" | tr '.' '-').json" +KUBECONFIG="${KUBECONFIG:-$(dirname "$0")/../config}" +export KUBECONFIG + +# Find all pods with the dependency annotation containing our service +find_dependent_owners() { + local service="$1" + kubectl get pods --all-namespaces \ + -o jsonpath='{range .items[*]}{.metadata.namespace}{"\t"}{.metadata.annotations.dependency\.kyverno\.io/wait-for}{"\t"}{.metadata.ownerReferences[0].kind}{"\t"}{.metadata.ownerReferences[0].name}{"\n"}{end}' \ + 2>/dev/null | \ + grep "$service" | \ + while IFS=$'\t' read -r ns annotation owner_kind owner_name; do + [[ -z "$owner_kind" || -z "$owner_name" ]] && continue + # Resolve ReplicaSet -> Deployment + if [[ "$owner_kind" == "ReplicaSet" ]]; then + deploy_name=$(kubectl get replicaset "$owner_name" -n "$ns" \ + -o jsonpath='{.metadata.ownerReferences[0].name}' 2>/dev/null || true) + if [[ -n "$deploy_name" ]]; then + echo "Deployment/${deploy_name}/${ns}" + fi + elif [[ "$owner_kind" == "StatefulSet" ]]; then + echo "StatefulSet/${owner_name}/${ns}" + fi + done | sort -u +} + +do_shutdown() { + echo "Finding dependents of $SERVICE..." + local owners + owners=$(find_dependent_owners "$SERVICE") + + if [[ -z "$owners" ]]; then + echo "No dependents found for $SERVICE" + exit 0 + fi + + echo "Dependents found:" + echo "$owners" | while IFS='/' read -r kind name ns; do + echo " $ns/$kind/$name" + done + + # Save current replica counts + local state="[]" + while IFS='/' read -r kind name ns; do + replicas=$(kubectl get "$kind" "$name" -n "$ns" \ + -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "1") + state=$(echo "$state" | jq --arg kind "$kind" --arg name "$name" \ + --arg ns "$ns" --argjson replicas "${replicas:-1}" \ + '. + [{"kind": $kind, "name": $name, "namespace": $ns, "replicas": $replicas}]') + done <<< "$owners" + + echo "$state" > "$STATE_FILE" + echo "Saved replica state to $STATE_FILE" + + # Scale down + while IFS='/' read -r kind name ns; do + echo "Scaling $ns/$kind/$name to 0..." + kubectl scale "$kind" "$name" -n "$ns" --replicas=0 + done <<< "$owners" + + echo "" + echo "Waiting for pods to terminate..." + while IFS='/' read -r kind name ns; do + kubectl rollout status "$kind" "$name" -n "$ns" --timeout=120s 2>/dev/null || true + done <<< "$owners" + + echo "" + echo "All dependents of $SERVICE scaled to 0." + echo "Run '$0 startup $SERVICE' after maintenance to restore." +} + +do_startup() { + if [[ ! -f "$STATE_FILE" ]]; then + echo "Error: No state file found at $STATE_FILE" + echo "Did you run '$0 shutdown $SERVICE' first?" + exit 1 + fi + + echo "Restoring dependents of $SERVICE from $STATE_FILE..." + + local count + count=$(jq length "$STATE_FILE") + + for ((i = 0; i < count; i++)); do + kind=$(jq -r ".[$i].kind" "$STATE_FILE") + name=$(jq -r ".[$i].name" "$STATE_FILE") + ns=$(jq -r ".[$i].namespace" "$STATE_FILE") + replicas=$(jq -r ".[$i].replicas" "$STATE_FILE") + + echo "Scaling $ns/$kind/$name to $replicas..." + kubectl scale "$kind" "$name" -n "$ns" --replicas="$replicas" + done + + echo "" + echo "Waiting for rollouts..." + for ((i = 0; i < count; i++)); do + kind=$(jq -r ".[$i].kind" "$STATE_FILE") + name=$(jq -r ".[$i].name" "$STATE_FILE") + ns=$(jq -r ".[$i].namespace" "$STATE_FILE") + kubectl rollout status "$kind" "$name" -n "$ns" --timeout=300s 2>/dev/null || true + done + + rm -f "$STATE_FILE" + echo "" + echo "All dependents of $SERVICE restored." +} + +case "$ACTION" in + shutdown) do_shutdown ;; + startup) do_startup ;; +esac diff --git a/stacks/affine/main.tf b/stacks/affine/main.tf index 303d83a1..5b464e30 100644 --- a/stacks/affine/main.tf +++ b/stacks/affine/main.tf @@ -112,6 +112,9 @@ resource "kubernetes_deployment" "affine" { labels = { app = "affine" } + annotations = { + "dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432,redis.redis:6379" + } } spec { # Init container to run database migrations diff --git a/stacks/claude-memory/main.tf b/stacks/claude-memory/main.tf index 22ae49d7..2825c049 100644 --- a/stacks/claude-memory/main.tf +++ b/stacks/claude-memory/main.tf @@ -117,6 +117,9 @@ resource "kubernetes_deployment" "claude-memory" { labels = { app = "claude-memory" } + annotations = { + "dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432" + } } spec { affinity { diff --git a/stacks/dawarich/main.tf b/stacks/dawarich/main.tf index 9c2897f3..5874e6ad 100644 --- a/stacks/dawarich/main.tf +++ b/stacks/dawarich/main.tf @@ -84,6 +84,7 @@ resource "kubernetes_deployment" "dawarich" { annotations = { # "diun.enable" = "true" # "diun.include_tags" = "latest" + "dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432,redis.redis:6379" } } spec { diff --git a/stacks/grampsweb/main.tf b/stacks/grampsweb/main.tf index afb70b0c..6ecf3163 100644 --- a/stacks/grampsweb/main.tf +++ b/stacks/grampsweb/main.tf @@ -137,6 +137,9 @@ resource "kubernetes_deployment" "grampsweb" { labels = { app = "grampsweb" } + annotations = { + "dependency.kyverno.io/wait-for" = "redis.redis:6379" + } } spec { container { diff --git a/stacks/hackmd/main.tf b/stacks/hackmd/main.tf index e221c238..281b29c6 100644 --- a/stacks/hackmd/main.tf +++ b/stacks/hackmd/main.tf @@ -57,6 +57,9 @@ resource "kubernetes_deployment" "hackmd" { app = "hackmd" "kubernetes.io/cluster-service" = "true" } + annotations = { + "dependency.kyverno.io/wait-for" = "mysql.dbaas:3306" + } } spec { # container { diff --git a/stacks/health/main.tf b/stacks/health/main.tf index e12aa7b5..24ce403a 100644 --- a/stacks/health/main.tf +++ b/stacks/health/main.tf @@ -52,6 +52,9 @@ resource "kubernetes_deployment" "health" { labels = { app = "health" } + annotations = { + "dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432" + } } spec { container { diff --git a/stacks/immich/frame.tf b/stacks/immich/frame.tf index 3a0e60ed..a6fa0098 100644 --- a/stacks/immich/frame.tf +++ b/stacks/immich/frame.tf @@ -58,6 +58,9 @@ resource "kubernetes_deployment" "immich-frame" { labels = { app = "immich-frame" } + annotations = { + "dependency.kyverno.io/wait-for" = "immich-server.immich:2283" + } } spec { container { diff --git a/stacks/linkwarden/main.tf b/stacks/linkwarden/main.tf index 359b1aca..6a406412 100644 --- a/stacks/linkwarden/main.tf +++ b/stacks/linkwarden/main.tf @@ -87,8 +87,9 @@ resource "kubernetes_deployment" "linkwarden" { app = "linkwarden" } annotations = { - "diun.enable" = "false" - "diun.include_tags" = "latest" + "diun.enable" = "false" + "diun.include_tags" = "latest" + "dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432" } } spec { diff --git a/stacks/matrix/main.tf b/stacks/matrix/main.tf index 97135455..2f17b123 100644 --- a/stacks/matrix/main.tf +++ b/stacks/matrix/main.tf @@ -50,6 +50,9 @@ resource "kubernetes_deployment" "matrix" { labels = { app = "matrix" } + annotations = { + "dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432" + } } spec { init_container { diff --git a/stacks/n8n/main.tf b/stacks/n8n/main.tf index 2cd0f722..ea43d762 100644 --- a/stacks/n8n/main.tf +++ b/stacks/n8n/main.tf @@ -122,6 +122,9 @@ resource "kubernetes_deployment" "n8n" { labels = { app = "n8n" } + annotations = { + "dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432" + } } spec { service_account_name = kubernetes_service_account.n8n.metadata[0].name diff --git a/stacks/nextcloud/chart_values.yaml b/stacks/nextcloud/chart_values.yaml index f8421fb4..8007019e 100644 --- a/stacks/nextcloud/chart_values.yaml +++ b/stacks/nextcloud/chart_values.yaml @@ -98,6 +98,7 @@ readinessProbe: podAnnotations: diun.enable: "true" diun.include_tags: "^[0-9]+(?:.[0-9]+)?(?:.[0-9]+)?.*" + dependency.kyverno.io/wait-for: "mysql.dbaas:3306,redis.redis:6379" collabora: enabled: false # Using onlyoffice instead diff --git a/stacks/ollama/main.tf b/stacks/ollama/main.tf index e095bd60..6ab444c7 100644 --- a/stacks/ollama/main.tf +++ b/stacks/ollama/main.tf @@ -248,6 +248,9 @@ resource "kubernetes_deployment" "ollama-ui" { labels = { app = "ollama-ui" } + annotations = { + "dependency.kyverno.io/wait-for" = "ollama.ollama:11434" + } } spec { container { diff --git a/stacks/onlyoffice/main.tf b/stacks/onlyoffice/main.tf index c779051b..3643b454 100644 --- a/stacks/onlyoffice/main.tf +++ b/stacks/onlyoffice/main.tf @@ -123,6 +123,9 @@ resource "kubernetes_deployment" "onlyoffice-document-server" { labels = { app = "onlyoffice-document-server" } + annotations = { + "dependency.kyverno.io/wait-for" = "mysql.dbaas:3306,redis.redis:6379" + } } spec { container { diff --git a/stacks/paperless-ngx/main.tf b/stacks/paperless-ngx/main.tf index 9449320d..eb6dd336 100644 --- a/stacks/paperless-ngx/main.tf +++ b/stacks/paperless-ngx/main.tf @@ -97,8 +97,9 @@ resource "kubernetes_deployment" "paperless-ngx" { app = "paperless-ngx" } annotations = { - "diun.enable" = "false" - "diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$" + "diun.enable" = "false" + "diun.include_tags" = "^\\d+(?:\\.\\d+)?(?:\\.\\d+)?$" + "dependency.kyverno.io/wait-for" = "mysql.dbaas:3306,redis.redis:6379" } } spec { diff --git a/stacks/platform/modules/kyverno/dependency-init-containers.tf b/stacks/platform/modules/kyverno/dependency-init-containers.tf new file mode 100644 index 00000000..ffe24780 --- /dev/null +++ b/stacks/platform/modules/kyverno/dependency-init-containers.tf @@ -0,0 +1,72 @@ + +# ============================================================================= +# Pod Dependency Init Container Injection +# ============================================================================= +# Reads the annotation dependency.kyverno.io/wait-for from pods and injects +# init containers that wait for each listed dependency to be reachable. +# +# Usage: +# annotations: +# dependency.kyverno.io/wait-for: "postgresql.dbaas:5432,redis.redis:6379" +# +# Each comma-separated entry becomes a busybox init container that runs +# `nc -z ` in a loop until the dependency is reachable. +# Existing init containers are preserved — Kyverno appends to the array. + +resource "kubernetes_manifest" "inject_dependency_init_containers" { + manifest = { + apiVersion = "kyverno.io/v1" + kind = "ClusterPolicy" + metadata = { + name = "inject-dependency-init-containers" + annotations = { + "policies.kyverno.io/title" = "Inject Dependency Init Containers" + "policies.kyverno.io/description" = "Injects wait-for init containers based on dependency.kyverno.io/wait-for pod annotation. Each comma-separated host:port entry becomes a busybox init container that blocks until the dependency is reachable via nc -z." + } + } + spec = { + rules = [ + { + name = "wait-for-dependencies" + match = { + any = [ + { + resources = { + kinds = ["Pod"] + operations = ["CREATE"] + } + } + ] + } + preconditions = { + all = [ + { + key = "{{ request.object.metadata.annotations.\"dependency.kyverno.io/wait-for\" || '' }}" + operator = "NotEquals" + value = "" + } + ] + } + mutate = { + foreach = [ + { + list = "request.object.metadata.annotations.\"dependency.kyverno.io/wait-for\" | split(@, ',')" + patchStrategicMerge = { + spec = { + initContainers = [ + { + name = "wait-for-{{ element | split(@, ':') | [0] | replace_all(@, '.', '-') }}" + image = "busybox:1.37" + command = ["sh", "-c", "until nc -z {{ element | split(@, ':') | [0] }} {{ element | split(@, ':') | [1] }}; do echo waiting for {{ element }}; sleep 2; done"] + } + ] + } + } + } + ] + } + } + ] + } + } +} diff --git a/stacks/platform/modules/monitoring/grafana_chart_values.yaml b/stacks/platform/modules/monitoring/grafana_chart_values.yaml index a5bc4222..64aac761 100644 --- a/stacks/platform/modules/monitoring/grafana_chart_values.yaml +++ b/stacks/platform/modules/monitoring/grafana_chart_values.yaml @@ -18,6 +18,8 @@ topologySpreadConstraints: labelSelector: matchLabels: app.kubernetes.io/name: grafana +podAnnotations: + dependency.kyverno.io/wait-for: "mysql.dbaas:3306" podDisruptionBudget: maxUnavailable: 1 persistence: diff --git a/stacks/real-estate-crawler/main.tf b/stacks/real-estate-crawler/main.tf index 806d79e5..333f58c4 100644 --- a/stacks/real-estate-crawler/main.tf +++ b/stacks/real-estate-crawler/main.tf @@ -142,6 +142,9 @@ resource "kubernetes_deployment" "realestate-crawler-api" { app = "realestate-crawler-api" "kubernetes.io/cluster-service" = "true" } + annotations = { + "dependency.kyverno.io/wait-for" = "mysql.dbaas:3306,redis.redis:6379" + } } spec { container { @@ -316,6 +319,9 @@ resource "kubernetes_deployment" "realestate-crawler-celery" { labels = { app = "realestate-crawler-celery" } + annotations = { + "dependency.kyverno.io/wait-for" = "mysql.dbaas:3306,redis.redis:6379" + } } spec { container { @@ -430,6 +436,9 @@ resource "kubernetes_deployment" "realestate-crawler-celery-beat" { labels = { app = "realestate-crawler-celery-beat" } + annotations = { + "dependency.kyverno.io/wait-for" = "mysql.dbaas:3306,redis.redis:6379" + } } spec { container { diff --git a/stacks/rybbit/main.tf b/stacks/rybbit/main.tf index 6a3bbfa2..47a4c161 100644 --- a/stacks/rybbit/main.tf +++ b/stacks/rybbit/main.tf @@ -252,6 +252,9 @@ resource "kubernetes_deployment" "rybbit" { labels = { app = "rybbit" } + annotations = { + "dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432,clickhouse.rybbit:8123" + } } spec { container { @@ -404,6 +407,9 @@ resource "kubernetes_deployment" "rybbit-client" { labels = { app = "rybbit-client" } + annotations = { + "dependency.kyverno.io/wait-for" = "rybbit.rybbit:3001" + } } spec { container { diff --git a/stacks/speedtest/main.tf b/stacks/speedtest/main.tf index 2c91942b..a45853b6 100644 --- a/stacks/speedtest/main.tf +++ b/stacks/speedtest/main.tf @@ -83,6 +83,9 @@ resource "kubernetes_deployment" "speedtest" { labels = { app = "speedtest" } + annotations = { + "dependency.kyverno.io/wait-for" = "mysql.dbaas:3306" + } } spec { container { diff --git a/stacks/tandoor/main.tf b/stacks/tandoor/main.tf index 24f321a8..a5c1b5f3 100644 --- a/stacks/tandoor/main.tf +++ b/stacks/tandoor/main.tf @@ -95,6 +95,9 @@ resource "kubernetes_deployment" "tandoor" { labels = { app = "tandoor" } + annotations = { + "dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432" + } } spec { container { diff --git a/stacks/trading-bot/main.tf b/stacks/trading-bot/main.tf index 28fd2fe1..8559d0f8 100644 --- a/stacks/trading-bot/main.tf +++ b/stacks/trading-bot/main.tf @@ -207,6 +207,9 @@ resource "kubernetes_deployment" "trading-bot-frontend" { labels = { app = "trading-bot-frontend" } + annotations = { + "dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432,redis.redis:6379" + } } spec { container { @@ -299,6 +302,9 @@ resource "kubernetes_deployment" "trading-bot-workers" { labels = { app = "trading-bot-workers" } + annotations = { + "dependency.kyverno.io/wait-for" = "postgresql.dbaas:5432,redis.redis:6379" + } } spec { container {