right-size memory requests to unblock GPU workloads and fix dbaas quota [ci skip]
- nvidia: custom LimitRange (128Mi default, was 1Gi from Kyverno) to stop inflating GPU operator init containers; saves ~2.5Gi on GPU node - nvidia: dcgm-exporter 1536Mi → 768Mi (actual usage 489Mi) - monitoring: prometheus server 4Gi → 3Gi (actual usage 2.6Gi) - onlyoffice: 2304Mi → 1536Mi (actual usage 1.3Gi) - immich: frame explicit 64Mi resources (was getting 1Gi LimitRange default) - dbaas: quota limits.memory 20Gi → 24Gi to fit 3rd MySQL replica Root cause: Kyverno tier-2-gpu LimitRange injected 1Gi on every NVIDIA init container (no explicit resources), wasting ~2.5Gi scheduling overhead on the GPU node. Combined with over-requesting, frigate and immich-ml couldn't schedule.
This commit is contained in:
parent
73511b1230
commit
12a51c4ffa
6 changed files with 46 additions and 11 deletions
|
|
@ -35,8 +35,8 @@ resource "kubernetes_resource_quota" "dbaas" {
|
||||||
spec {
|
spec {
|
||||||
hard = {
|
hard = {
|
||||||
"requests.cpu" = "8"
|
"requests.cpu" = "8"
|
||||||
"requests.memory" = "20Gi"
|
"requests.memory" = "24Gi"
|
||||||
"limits.memory" = "20Gi"
|
"limits.memory" = "24Gi"
|
||||||
pods = "30"
|
pods = "30"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,15 @@ resource "kubernetes_deployment" "immich-frame" {
|
||||||
container {
|
container {
|
||||||
image = "ghcr.io/immichframe/immichframe:latest"
|
image = "ghcr.io/immichframe/immichframe:latest"
|
||||||
name = "immich-frame"
|
name = "immich-frame"
|
||||||
|
resources {
|
||||||
|
requests = {
|
||||||
|
cpu = "10m"
|
||||||
|
memory = "64Mi"
|
||||||
|
}
|
||||||
|
limits = {
|
||||||
|
memory = "128Mi"
|
||||||
|
}
|
||||||
|
}
|
||||||
port {
|
port {
|
||||||
container_port = 8080
|
container_port = 8080
|
||||||
protocol = "TCP"
|
protocol = "TCP"
|
||||||
|
|
|
||||||
|
|
@ -161,9 +161,9 @@ server:
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
memory: 4Gi
|
memory: 3Gi
|
||||||
limits:
|
limits:
|
||||||
memory: 4Gi
|
memory: 3Gi
|
||||||
livenessProbeInitialDelay: 300
|
livenessProbeInitialDelay: 300
|
||||||
readinessProbeInitialDelay: 60
|
readinessProbeInitialDelay: 60
|
||||||
strategy:
|
strategy:
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,34 @@ resource "kubernetes_namespace" "nvidia" {
|
||||||
name = "nvidia"
|
name = "nvidia"
|
||||||
labels = {
|
labels = {
|
||||||
"istio-injection" : "disabled"
|
"istio-injection" : "disabled"
|
||||||
tier = var.tier
|
tier = var.tier
|
||||||
"resource-governance/custom-quota" = "true"
|
"resource-governance/custom-quota" = "true"
|
||||||
|
"resource-governance/custom-limitrange" = "true"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Custom LimitRange — overrides Kyverno tier-2-gpu default (1Gi per container)
|
||||||
|
# which was inflating NVIDIA operator init container requests by ~2.5Gi total.
|
||||||
|
# Init containers do quick validation checks and need minimal memory.
|
||||||
|
resource "kubernetes_limit_range" "nvidia_defaults" {
|
||||||
|
metadata {
|
||||||
|
name = "tier-defaults"
|
||||||
|
namespace = kubernetes_namespace.nvidia.metadata[0].name
|
||||||
|
}
|
||||||
|
spec {
|
||||||
|
limit {
|
||||||
|
type = "Container"
|
||||||
|
default = {
|
||||||
|
memory = "128Mi"
|
||||||
|
}
|
||||||
|
default_request = {
|
||||||
|
cpu = "50m"
|
||||||
|
memory = "128Mi"
|
||||||
|
}
|
||||||
|
max = {
|
||||||
|
memory = "16Gi"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,13 +18,13 @@ driver:
|
||||||
config:
|
config:
|
||||||
name: time-slicing-config
|
name: time-slicing-config
|
||||||
|
|
||||||
# DCGM Exporter - reduced from 2560Mi to 1536Mi based on VPA upper bound of 1459Mi (1.05x margin)
|
# DCGM Exporter - reduced to 768Mi (actual usage ~489Mi, 1.5x margin)
|
||||||
dcgmExporter:
|
dcgmExporter:
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: "1536Mi"
|
memory: "768Mi"
|
||||||
limits:
|
limits:
|
||||||
memory: "1536Mi"
|
memory: "768Mi"
|
||||||
|
|
||||||
# CUDA Validator - reduced from 1024Mi to 256Mi (one-shot job)
|
# CUDA Validator - reduced from 1024Mi to 256Mi (one-shot job)
|
||||||
validator:
|
validator:
|
||||||
|
|
|
||||||
|
|
@ -134,10 +134,10 @@ resource "kubernetes_deployment" "onlyoffice-document-server" {
|
||||||
resources {
|
resources {
|
||||||
requests = {
|
requests = {
|
||||||
cpu = "100m"
|
cpu = "100m"
|
||||||
memory = "2304Mi"
|
memory = "1536Mi"
|
||||||
}
|
}
|
||||||
limits = {
|
limits = {
|
||||||
memory = "2304Mi"
|
memory = "1536Mi"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
port {
|
port {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue