right-size 14 services and scale down GPU-heavy workloads [ci skip]

Memory right-sizing based on VPA upperBound analysis:
- Increases: stirling-pdf 1200→1536Mi, claude-memory 64→128Mi,
  dawarich 512→768Mi, kyverno-cleanup 128→192Mi, linkwarden 768→1Gi,
  navidrome 64→128Mi, listenarr 768→896Mi, privatebin 64→128Mi,
  ntfy 64→128Mi, health 128→256Mi, dbaas quota 16→20Gi,
  mysql-operator 384→512Mi
- Decreases: rybbit 768→384Mi, nvidia-exporter added explicit 192Mi,
  dcgm-exporter 2560→1536Mi
- Scale to 0: ebook2audiobook/audiblez-web, whisper (GPU node pressure)

Net effect: -496Mi cluster-wide, 13 ContainerNearOOM alerts resolved,
all ResourceQuota pressures cleared, GPU health green.
This commit is contained in:
Viktor Barzin 2026-03-15 23:00:49 +00:00
parent b6d619e5df
commit a04335d0f3
16 changed files with 36 additions and 32 deletions

View file

@ -35,8 +35,8 @@ resource "kubernetes_resource_quota" "dbaas" {
spec {
hard = {
"requests.cpu" = "8"
"requests.memory" = "16Gi"
"limits.memory" = "16Gi"
"requests.memory" = "20Gi"
"limits.memory" = "20Gi"
pods = "30"
}
}
@ -78,10 +78,10 @@ resource "helm_release" "mysql_operator" {
resources = {
requests = {
cpu = "100m"
memory = "384Mi"
memory = "512Mi"
}
limits = {
memory = "384Mi"
memory = "512Mi"
}
}
})]

View file

@ -57,11 +57,11 @@ resource "helm_release" "kyverno" {
cleanupController = {
resources = {
limits = {
memory = "128Mi"
memory = "192Mi"
}
requests = {
cpu = "100m"
memory = "128Mi"
memory = "192Mi"
}
}
}

View file

@ -132,7 +132,11 @@ resource "kubernetes_deployment" "nvidia-exporter" {
}
}
resources {
requests = {
memory = "192Mi"
}
limits = {
memory = "192Mi"
"nvidia.com/gpu" = "1"
}
}

View file

@ -18,13 +18,13 @@ driver:
config:
name: time-slicing-config
# DCGM Exporter - increased from 768Mi to 2560Mi based on VPA upper bound of 2091Mi
# DCGM Exporter - reduced from 2560Mi to 1536Mi based on VPA upper bound of 1459Mi (1.05x margin)
dcgmExporter:
resources:
requests:
memory: "2560Mi"
memory: "1536Mi"
limits:
memory: "2560Mi"
memory: "1536Mi"
# CUDA Validator - reduced from 1024Mi to 256Mi (one-shot job)
validator: