infra/stacks/external-secrets/main.tf
Viktor Barzin 7cc9cde5b1
All checks were successful
ci/woodpecker/push/default Pipeline was successful
external-secrets: enable ESO Vault token cache to cut sdc write churn
Add --enable-vault-token-cache to the ESO controller (a graduated,
non-experimental flag in chart 2.6.0). Until now ESO authenticated to
Vault with login -> lookup-self -> revoke-self on *every* secret fetch.
Across 92 ExternalSecrets refreshing every 15m that measured ~0.22
logins/s + ~0.22 revoke-self/s on the active Vault member, and each
cycle is a token create+revoke (plus its lease) written to the Raft log
on all three members. Those fsync-heavy writes land on the contended
PVE RAID1 7200rpm HDD (sdc) -- one of the write sources behind the
recurring control-plane flaps (code-oflt write-reduction).

The eso kubernetes-auth role already issues a 240h periodic, unlimited-
use token, so the churn was pure waste: ESO discarded a perfectly good
token after a single use. With token caching ESO mints one token and
reuses/renews it, collapsing logins from ~13/min to a handful per token
lifetime. Verified live: vault cache initialized, 112/113 ExternalSecrets
Ready (the one failure, instagram-poster, is pre-existing data drift
unrelated to auth), logins dropped to ~0 after warm-up.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 15:32:37 +00:00

100 lines
3.1 KiB
HCL
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

resource "kubernetes_namespace" "external_secrets" {
metadata {
name = "external-secrets"
labels = {
tier = local.tiers.cluster
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
resource "helm_release" "external_secrets" {
name = "external-secrets"
namespace = kubernetes_namespace.external_secrets.metadata[0].name
repository = "https://charts.external-secrets.io"
chart = "external-secrets"
# ESO 0.12->2.6 migration (2026-06-21, docs/plans/2026-06-21-eso-0.12-to-2.x-migration-design.md).
# Stepped one minor at a time on k8s 1.34; rewrite all 104 CRs v1beta1->v1 at 0.16.2 before 0.17.
version = "2.6.0"
# Added for the migration: auto-rollback a failed hop's helm upgrade (ESO had no
# rollback safety net) and wait for the controller Deployment to be Ready first.
atomic = true
timeout = 600
values = [yamlencode({
installCRDs = true
# --enable-vault-token-cache (graduated in chart 2.6.0): ESO reuses ONE Vault token
# (the `eso` k8s-auth role mints a 240h periodic token) instead of login+lookup+revoke
# on every secret fetch. Cuts the dominant Vault Raft write-churn on the contended PVE
# sdc HDD: baseline was ~0.22 login/s + ~0.22 revoke-self/s across 92 ExternalSecrets@15m,
# each cycle a token create+revoke (+lease) written ×3 members. (code-oflt write-reduction)
extraArgs = {
"enable-vault-token-cache" = ""
}
})]
}
# --- ClusterSecretStore for Vault KV v2 ---
resource "kubernetes_manifest" "css_vault_kv" {
manifest = {
apiVersion = "external-secrets.io/v1"
kind = "ClusterSecretStore"
metadata = { name = "vault-kv" }
spec = {
provider = {
vault = {
server = "http://vault-active.vault.svc.cluster.local:8200"
path = "secret"
version = "v2"
auth = {
kubernetes = {
mountPath = "kubernetes"
role = "eso"
serviceAccountRef = {
name = "external-secrets"
namespace = "external-secrets"
}
}
}
}
}
}
}
depends_on = [helm_release.external_secrets]
}
# --- ClusterSecretStore for Vault Database Engine ---
resource "kubernetes_manifest" "css_vault_db" {
manifest = {
apiVersion = "external-secrets.io/v1"
kind = "ClusterSecretStore"
metadata = { name = "vault-database" }
spec = {
provider = {
vault = {
server = "http://vault-active.vault.svc.cluster.local:8200"
path = "database"
version = "v1"
auth = {
kubernetes = {
mountPath = "kubernetes"
role = "eso"
serviceAccountRef = {
name = "external-secrets"
namespace = "external-secrets"
}
}
}
}
}
}
}
depends_on = [helm_release.external_secrets]
}