diff --git a/stacks/dbaas/modules/dbaas/main.tf b/stacks/dbaas/modules/dbaas/main.tf index 1c94052e..233913f2 100644 --- a/stacks/dbaas/modules/dbaas/main.tf +++ b/stacks/dbaas/modules/dbaas/main.tf @@ -35,8 +35,8 @@ resource "kubernetes_resource_quota" "dbaas" { spec { hard = { "requests.cpu" = "8" - "requests.memory" = "20Gi" - "limits.memory" = "20Gi" + "requests.memory" = "24Gi" + "limits.memory" = "24Gi" pods = "30" } } diff --git a/stacks/immich/frame.tf b/stacks/immich/frame.tf index a6fa0098..f0c06487 100644 --- a/stacks/immich/frame.tf +++ b/stacks/immich/frame.tf @@ -66,6 +66,15 @@ resource "kubernetes_deployment" "immich-frame" { container { image = "ghcr.io/immichframe/immichframe:latest" name = "immich-frame" + resources { + requests = { + cpu = "10m" + memory = "64Mi" + } + limits = { + memory = "128Mi" + } + } port { container_port = 8080 protocol = "TCP" diff --git a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl index c4d5070b..8dcda717 100755 --- a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl +++ b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl @@ -161,9 +161,9 @@ server: resources: requests: cpu: 100m - memory: 4Gi + memory: 3Gi limits: - memory: 4Gi + memory: 3Gi livenessProbeInitialDelay: 300 readinessProbeInitialDelay: 60 strategy: diff --git a/stacks/nvidia/modules/nvidia/main.tf b/stacks/nvidia/modules/nvidia/main.tf index f23226a2..eb7cae52 100644 --- a/stacks/nvidia/modules/nvidia/main.tf +++ b/stacks/nvidia/modules/nvidia/main.tf @@ -12,8 +12,34 @@ resource "kubernetes_namespace" "nvidia" { name = "nvidia" labels = { "istio-injection" : "disabled" - tier = var.tier - "resource-governance/custom-quota" = "true" + tier = var.tier + "resource-governance/custom-quota" = "true" + "resource-governance/custom-limitrange" = "true" + } + } +} + +# Custom LimitRange — overrides Kyverno tier-2-gpu default (1Gi per container) +# which was inflating NVIDIA operator init container requests by ~2.5Gi total. +# Init containers do quick validation checks and need minimal memory. +resource "kubernetes_limit_range" "nvidia_defaults" { + metadata { + name = "tier-defaults" + namespace = kubernetes_namespace.nvidia.metadata[0].name + } + spec { + limit { + type = "Container" + default = { + memory = "128Mi" + } + default_request = { + cpu = "50m" + memory = "128Mi" + } + max = { + memory = "16Gi" + } } } } diff --git a/stacks/nvidia/modules/nvidia/values.yaml b/stacks/nvidia/modules/nvidia/values.yaml index 71c2eac7..03548a55 100644 --- a/stacks/nvidia/modules/nvidia/values.yaml +++ b/stacks/nvidia/modules/nvidia/values.yaml @@ -18,13 +18,13 @@ driver: config: name: time-slicing-config -# DCGM Exporter - reduced from 2560Mi to 1536Mi based on VPA upper bound of 1459Mi (1.05x margin) +# DCGM Exporter - reduced to 768Mi (actual usage ~489Mi, 1.5x margin) dcgmExporter: resources: requests: - memory: "1536Mi" + memory: "768Mi" limits: - memory: "1536Mi" + memory: "768Mi" # CUDA Validator - reduced from 1024Mi to 256Mi (one-shot job) validator: diff --git a/stacks/onlyoffice/main.tf b/stacks/onlyoffice/main.tf index 3643b454..86fd863b 100644 --- a/stacks/onlyoffice/main.tf +++ b/stacks/onlyoffice/main.tf @@ -134,10 +134,10 @@ resource "kubernetes_deployment" "onlyoffice-document-server" { resources { requests = { cpu = "100m" - memory = "2304Mi" + memory = "1536Mi" } limits = { - memory = "2304Mi" + memory = "1536Mi" } } port {