fix nvidia quota: use custom quota (32 CPU) instead of Kyverno-generated (16 CPU)

The GPU operator needs ~19 CPU limits across 16 pods (NFD, device plugin,
driver, validators, exporters). The Kyverno auto-generated quota of 16 CPU
was insufficient, blocking NFD worker and GC pods from scheduling.

- Add custom-quota label to nvidia namespace to exempt from Kyverno generation
- Add explicit ResourceQuota with limits.cpu=32, limits.memory=48Gi
- Fix: nvidia namespace tier label was missing after CI re-apply, causing
  Kyverno to use fallback LimitRange instead of tier-2-gpu specific one

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-03-12 07:04:34 +00:00
parent 8c920bd496
commit 457d29dd3d

View file

@ -13,6 +13,23 @@ resource "kubernetes_namespace" "nvidia" {
labels = {
"istio-injection" : "disabled"
tier = var.tier
"resource-governance/custom-quota" = "true"
}
}
}
resource "kubernetes_resource_quota" "nvidia_quota" {
metadata {
name = "tier-quota"
namespace = kubernetes_namespace.nvidia.metadata[0].name
}
spec {
hard = {
"limits.cpu" = "32"
"limits.memory" = "48Gi"
"requests.cpu" = "8"
"requests.memory" = "8Gi"
pods = "40"
}
}
}