diff --git a/stacks/claude-memory/main.tf b/stacks/claude-memory/main.tf index cacddc51..30d6e9fc 100644 --- a/stacks/claude-memory/main.tf +++ b/stacks/claude-memory/main.tf @@ -224,11 +224,11 @@ resource "kubernetes_deployment" "claude-memory" { resources { requests = { - memory = "64Mi" + memory = "128Mi" cpu = "10m" } limits = { - memory = "64Mi" + memory = "128Mi" } } } diff --git a/stacks/dawarich/main.tf b/stacks/dawarich/main.tf index 5874e6ad..8b84a75d 100644 --- a/stacks/dawarich/main.tf +++ b/stacks/dawarich/main.tf @@ -176,10 +176,10 @@ resource "kubernetes_deployment" "dawarich" { resources { requests = { cpu = "15m" - memory = "512Mi" + memory = "768Mi" } limits = { - memory = "512Mi" + memory = "768Mi" } } } diff --git a/stacks/ebook2audiobook/main.tf b/stacks/ebook2audiobook/main.tf index 1cd38f73..f012b1e4 100644 --- a/stacks/ebook2audiobook/main.tf +++ b/stacks/ebook2audiobook/main.tf @@ -326,7 +326,7 @@ resource "kubernetes_deployment" "audiblez-web" { } } spec { - replicas = 1 + replicas = 0 # Scaled down - GPU node memory pressure strategy { type = "Recreate" } diff --git a/stacks/health/main.tf b/stacks/health/main.tf index 3f108a10..0a9b30fc 100644 --- a/stacks/health/main.tf +++ b/stacks/health/main.tf @@ -107,11 +107,11 @@ resource "kubernetes_deployment" "health" { resources { requests = { - memory = "128Mi" + memory = "256Mi" cpu = "15m" } limits = { - memory = "128Mi" + memory = "256Mi" } } } diff --git a/stacks/linkwarden/main.tf b/stacks/linkwarden/main.tf index a35e79e7..c7741eb2 100644 --- a/stacks/linkwarden/main.tf +++ b/stacks/linkwarden/main.tf @@ -182,10 +182,10 @@ resource "kubernetes_deployment" "linkwarden" { resources { requests = { cpu = "50m" - memory = "768Mi" + memory = "1Gi" } limits = { - memory = "768Mi" + memory = "1Gi" } } } diff --git a/stacks/navidrome/main.tf b/stacks/navidrome/main.tf index 37f88729..1626b705 100644 --- a/stacks/navidrome/main.tf +++ b/stacks/navidrome/main.tf @@ -133,10 +133,10 @@ resource "kubernetes_deployment" "navidrome" { resources { requests = { cpu = "15m" - memory = "64Mi" + memory = "128Mi" } limits = { - memory = "64Mi" + memory = "128Mi" } } } diff --git a/stacks/ntfy/main.tf b/stacks/ntfy/main.tf index 86bdd2f4..fdd43667 100644 --- a/stacks/ntfy/main.tf +++ b/stacks/ntfy/main.tf @@ -121,10 +121,10 @@ resource "kubernetes_deployment" "ntfy" { resources { requests = { cpu = "10m" - memory = "64Mi" + memory = "128Mi" } limits = { - memory = "64Mi" + memory = "128Mi" } } } diff --git a/stacks/platform/modules/dbaas/main.tf b/stacks/platform/modules/dbaas/main.tf index 60e1bd86..1c94052e 100644 --- a/stacks/platform/modules/dbaas/main.tf +++ b/stacks/platform/modules/dbaas/main.tf @@ -35,8 +35,8 @@ resource "kubernetes_resource_quota" "dbaas" { spec { hard = { "requests.cpu" = "8" - "requests.memory" = "16Gi" - "limits.memory" = "16Gi" + "requests.memory" = "20Gi" + "limits.memory" = "20Gi" pods = "30" } } @@ -78,10 +78,10 @@ resource "helm_release" "mysql_operator" { resources = { requests = { cpu = "100m" - memory = "384Mi" + memory = "512Mi" } limits = { - memory = "384Mi" + memory = "512Mi" } } })] diff --git a/stacks/platform/modules/kyverno/main.tf b/stacks/platform/modules/kyverno/main.tf index 405029dc..bdd152ca 100644 --- a/stacks/platform/modules/kyverno/main.tf +++ b/stacks/platform/modules/kyverno/main.tf @@ -57,11 +57,11 @@ resource "helm_release" "kyverno" { cleanupController = { resources = { limits = { - memory = "128Mi" + memory = "192Mi" } requests = { cpu = "100m" - memory = "128Mi" + memory = "192Mi" } } } diff --git a/stacks/platform/modules/nvidia/main.tf b/stacks/platform/modules/nvidia/main.tf index 7297491d..f23226a2 100644 --- a/stacks/platform/modules/nvidia/main.tf +++ b/stacks/platform/modules/nvidia/main.tf @@ -132,7 +132,11 @@ resource "kubernetes_deployment" "nvidia-exporter" { } } resources { + requests = { + memory = "192Mi" + } limits = { + memory = "192Mi" "nvidia.com/gpu" = "1" } } diff --git a/stacks/platform/modules/nvidia/values.yaml b/stacks/platform/modules/nvidia/values.yaml index 44b6dcc2..71c2eac7 100644 --- a/stacks/platform/modules/nvidia/values.yaml +++ b/stacks/platform/modules/nvidia/values.yaml @@ -18,13 +18,13 @@ driver: config: name: time-slicing-config -# DCGM Exporter - increased from 768Mi to 2560Mi based on VPA upper bound of 2091Mi +# DCGM Exporter - reduced from 2560Mi to 1536Mi based on VPA upper bound of 1459Mi (1.05x margin) dcgmExporter: resources: requests: - memory: "2560Mi" + memory: "1536Mi" limits: - memory: "2560Mi" + memory: "1536Mi" # CUDA Validator - reduced from 1024Mi to 256Mi (one-shot job) validator: diff --git a/stacks/privatebin/main.tf b/stacks/privatebin/main.tf index 969acba2..7b74b310 100644 --- a/stacks/privatebin/main.tf +++ b/stacks/privatebin/main.tf @@ -70,10 +70,10 @@ resource "kubernetes_deployment" "privatebin" { resources { requests = { cpu = "10m" - memory = "64Mi" + memory = "128Mi" } limits = { - memory = "64Mi" + memory = "128Mi" } } } diff --git a/stacks/rybbit/main.tf b/stacks/rybbit/main.tf index 47a4c161..4ecf5a7e 100644 --- a/stacks/rybbit/main.tf +++ b/stacks/rybbit/main.tf @@ -353,10 +353,10 @@ resource "kubernetes_deployment" "rybbit" { resources { requests = { cpu = "25m" - memory = "768Mi" + memory = "384Mi" } limits = { - memory = "768Mi" + memory = "384Mi" } } } diff --git a/stacks/servarr/listenarr/main.tf b/stacks/servarr/listenarr/main.tf index fde9f5b6..7b6163a8 100644 --- a/stacks/servarr/listenarr/main.tf +++ b/stacks/servarr/listenarr/main.tf @@ -59,10 +59,10 @@ resource "kubernetes_deployment" "listenarr" { resources { requests = { cpu = "25m" - memory = "768Mi" + memory = "896Mi" } limits = { - memory = "768Mi" + memory = "896Mi" } } } diff --git a/stacks/stirling-pdf/main.tf b/stacks/stirling-pdf/main.tf index 16ef39a9..86fa258f 100644 --- a/stacks/stirling-pdf/main.tf +++ b/stacks/stirling-pdf/main.tf @@ -61,10 +61,10 @@ resource "kubernetes_deployment" "stirling-pdf" { resources { requests = { cpu = "25m" - memory = "1200Mi" + memory = "1536Mi" } limits = { - memory = "1200Mi" + memory = "1536Mi" } } diff --git a/stacks/whisper/main.tf b/stacks/whisper/main.tf index 36d5144e..4d98346e 100644 --- a/stacks/whisper/main.tf +++ b/stacks/whisper/main.tf @@ -38,7 +38,7 @@ resource "kubernetes_deployment" "whisper" { } } spec { - replicas = 1 + replicas = 0 # Scaled down - GPU node memory pressure strategy { type = "Recreate" } @@ -156,7 +156,7 @@ resource "kubernetes_deployment" "piper" { } } spec { - replicas = 1 + replicas = 0 # Scaled down - GPU node memory pressure strategy { type = "Recreate" }