add vaultwarden daily backup CronJob to NFS

SQLite backup via Online Backup API + copy of RSA keys,
attachments, sends, and config. 30-day retention with rotation.
Pod affinity ensures co-scheduling with vaultwarden for RWO PVC access.
This commit is contained in:
Viktor Barzin 2026-03-15 00:03:59 +00:00 committed by Viktor Barzin
parent 3c622659d8
commit 9acbcc7718
127 changed files with 2521 additions and 413 deletions

View file

@ -30,7 +30,7 @@ resource "helm_release" "kyverno" {
reportsController = {
resources = {
limits = {
memory = "128Mi"
memory = "256Mi"
}
requests = {
cpu = "100m"

View file

@ -790,57 +790,6 @@ resource "kubernetes_manifest" "mutate_priority_from_tier" {
}
}
# --- GPU toleration for critical tiers ---
# Allows pods in tier-0-core and tier-1-cluster namespaces to overflow onto the
# GPU node during N-1 failures. Uses patchesJson6902 (not patchStrategicMerge)
# to APPEND the toleration without replacing existing tolerations.
resource "kubernetes_manifest" "mutate_gpu_toleration" {
manifest = {
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "gpu-toleration-critical-tiers"
annotations = {
"policies.kyverno.io/title" = "GPU Toleration for Critical Tiers"
"policies.kyverno.io/description" = "Adds nvidia.com/gpu toleration to pods in tier-0-core and tier-1-cluster namespaces so they can overflow onto the GPU node during N-1 failures."
}
}
spec = {
rules = [for tier in ["0-core", "1-cluster"] : {
name = "add-gpu-toleration-tier-${split("-", tier)[0]}"
match = {
any = [
{
resources = {
kinds = ["Pod"]
operations = ["CREATE"]
namespaceSelector = {
matchLabels = {
tier = tier
}
}
}
}
]
}
skipBackgroundRequests = true
mutate = {
patchesJson6902 = yamlencode([
{
op = "add"
path = "/spec/tolerations/-"
value = {
key = "nvidia.com/gpu"
operator = "Exists"
effect = "NoSchedule"
}
}
])
}
}]
}
}
}
# --- ndots:2 injection ---
# Kubernetes defaults to ndots:5, which causes 4 wasted NxDomain queries per
@ -909,90 +858,3 @@ resource "kubernetes_manifest" "mutate_ndots" {
}
}
# -----------------------------------------------------------------------------
# Layer 5: GPU Node Toleration for Critical Services (Kyverno Mutate)
# -----------------------------------------------------------------------------
# Adds nvidia.com/gpu toleration to pods in tier-0 and tier-1 namespaces.
# This allows critical infrastructure to overflow onto the GPU node (k8s-node1)
# during N-1 scenarios, giving the scheduler ~14 GiB extra capacity.
# GPU workloads won't be preempted this just makes the node eligible.
resource "kubernetes_manifest" "mutate_gpu_toleration_critical" {
manifest = {
apiVersion = "kyverno.io/v1"
kind = "ClusterPolicy"
metadata = {
name = "gpu-toleration-critical-tiers"
annotations = {
"policies.kyverno.io/title" = "GPU Toleration for Critical Tiers"
"policies.kyverno.io/description" = "Adds nvidia.com/gpu toleration to pods in tier-0-core and tier-1-cluster namespaces so they can overflow onto the GPU node during N-1 failures."
}
}
spec = {
rules = [
{
name = "add-gpu-toleration-tier-0"
match = {
any = [
{
resources = {
kinds = ["Pod"]
operations = ["CREATE"]
namespaceSelector = {
matchLabels = {
tier = "0-core"
}
}
}
}
]
}
mutate = {
patchStrategicMerge = {
spec = {
tolerations = [
{
key = "nvidia.com/gpu"
operator = "Exists"
effect = "NoSchedule"
}
]
}
}
}
},
{
name = "add-gpu-toleration-tier-1"
match = {
any = [
{
resources = {
kinds = ["Pod"]
operations = ["CREATE"]
namespaceSelector = {
matchLabels = {
tier = "1-cluster"
}
}
}
}
]
}
mutate = {
patchStrategicMerge = {
spec = {
tolerations = [
{
key = "nvidia.com/gpu"
operator = "Exists"
effect = "NoSchedule"
}
]
}
}
}
},
]
}
}
}