right-size memory requests to unblock GPU workloads and fix dbaas quota [ci skip]

- nvidia: custom LimitRange (128Mi default, was 1Gi from Kyverno) to stop
  inflating GPU operator init containers; saves ~2.5Gi on GPU node
- nvidia: dcgm-exporter 1536Mi → 768Mi (actual usage 489Mi)
- monitoring: prometheus server 4Gi → 3Gi (actual usage 2.6Gi)
- onlyoffice: 2304Mi → 1536Mi (actual usage 1.3Gi)
- immich: frame explicit 64Mi resources (was getting 1Gi LimitRange default)
- dbaas: quota limits.memory 20Gi → 24Gi to fit 3rd MySQL replica

Root cause: Kyverno tier-2-gpu LimitRange injected 1Gi on every NVIDIA init
container (no explicit resources), wasting ~2.5Gi scheduling overhead on the
GPU node. Combined with over-requesting, frigate and immich-ml couldn't schedule.
This commit is contained in:
Viktor Barzin 2026-03-17 22:35:54 +00:00
parent 73511b1230
commit 12a51c4ffa
6 changed files with 46 additions and 11 deletions

View file

@ -12,8 +12,34 @@ resource "kubernetes_namespace" "nvidia" {
name = "nvidia"
labels = {
"istio-injection" : "disabled"
tier = var.tier
"resource-governance/custom-quota" = "true"
tier = var.tier
"resource-governance/custom-quota" = "true"
"resource-governance/custom-limitrange" = "true"
}
}
}
# Custom LimitRange overrides Kyverno tier-2-gpu default (1Gi per container)
# which was inflating NVIDIA operator init container requests by ~2.5Gi total.
# Init containers do quick validation checks and need minimal memory.
resource "kubernetes_limit_range" "nvidia_defaults" {
metadata {
name = "tier-defaults"
namespace = kubernetes_namespace.nvidia.metadata[0].name
}
spec {
limit {
type = "Container"
default = {
memory = "128Mi"
}
default_request = {
cpu = "50m"
memory = "128Mi"
}
max = {
memory = "16Gi"
}
}
}
}

View file

@ -18,13 +18,13 @@ driver:
config:
name: time-slicing-config
# DCGM Exporter - reduced from 2560Mi to 1536Mi based on VPA upper bound of 1459Mi (1.05x margin)
# DCGM Exporter - reduced to 768Mi (actual usage ~489Mi, 1.5x margin)
dcgmExporter:
resources:
requests:
memory: "1536Mi"
memory: "768Mi"
limits:
memory: "1536Mi"
memory: "768Mi"
# CUDA Validator - reduced from 1024Mi to 256Mi (one-shot job)
validator: