equalize memory req=lim across 70+ containers using Prometheus 7d max data

After node2 OOM incident, right-size memory across the cluster by setting
requests=limits based on max_over_time(container_memory_working_set_bytes[7d])
with 1.3x headroom. Eliminates ~37Gi overcommit gap.

Categories:
- Safe equalization (50 containers): set req=lim where max7d well within target
- Limit increases (8 containers): raise limits for services spiking above current
- No Prometheus data (12 containers): conservatively set lim=req
- Exception: nextcloud keeps req=256Mi/lim=8Gi due to Apache memory spikes

Also increased dbaas namespace quota from 12Gi to 16Gi to accommodate mysql
4Gi limits across 3 replicas.
This commit is contained in:
Viktor Barzin 2026-03-14 21:46:49 +00:00
parent eb0301b02b
commit 23019da8e5
39 changed files with 211 additions and 74 deletions

View file

@ -152,7 +152,7 @@ resource "kubernetes_deployment" "actualbudget-http-api" {
memory = "128Mi" memory = "128Mi"
} }
limits = { limits = {
memory = "512Mi" memory = "128Mi"
} }
} }

View file

@ -33,7 +33,7 @@ variable "gemini_api_key" {
} }
variable "memory_limit" { variable "memory_limit" {
type = string type = string
default = "256Mi" default = "128Mi"
} }
variable "cpu_request" { variable "cpu_request" {
type = string type = string
@ -41,7 +41,7 @@ variable "cpu_request" {
} }
variable "memory_request" { variable "memory_request" {
type = string type = string
default = "64Mi" default = "128Mi"
} }
variable "extra_annotations" { variable "extra_annotations" {
type = map(string) type = map(string)

View file

@ -86,7 +86,7 @@ resource "kubernetes_deployment" "frigate" {
resources { resources {
requests = { requests = {
cpu = "1500m" cpu = "1500m"
memory = "2Gi" memory = "8Gi"
} }
limits = { limits = {
memory = "8Gi" memory = "8Gi"

View file

@ -104,10 +104,16 @@ collabora:
resources: resources:
limits: limits:
memory: 1Gi memory: 8Gi
requests: requests:
cpu: 50m cpu: 50m
memory: 256Mi memory: 256Mi
cronjob: cronjob:
enabled: true enabled: true
resources:
limits:
memory: 384Mi
requests:
cpu: 25m
memory: 384Mi

View file

@ -105,7 +105,7 @@ resource "kubernetes_deployment" "novelapp" {
cpu = "10m" cpu = "10m"
} }
limits = { limits = {
memory = "128Mi" memory = "64Mi"
} }
} }
} }

View file

@ -119,6 +119,7 @@ resource "kubernetes_deployment" "ollama" {
memory = "256Mi" memory = "256Mi"
} }
limits = { limits = {
memory = "256Mi"
"nvidia.com/gpu" = "1" "nvidia.com/gpu" = "1"
} }
} }

View file

@ -20,7 +20,7 @@ server:
resources: resources:
requests: requests:
cpu: 100m cpu: 100m
memory: 512Mi memory: 1Gi
limits: limits:
memory: 1Gi memory: 1Gi
topologySpreadConstraints: topologySpreadConstraints:
@ -48,7 +48,7 @@ worker:
resources: resources:
requests: requests:
cpu: 100m cpu: 100m
memory: 384Mi memory: 1Gi
limits: limits:
memory: 1Gi memory: 1Gi
topologySpreadConstraints: topologySpreadConstraints:

View file

@ -73,10 +73,10 @@ resource "kubernetes_deployment" "cloudflared" {
resources { resources {
requests = { requests = {
cpu = "15m" cpu = "15m"
memory = "32Mi" memory = "128Mi"
} }
limits = { limits = {
memory = "256Mi" memory = "128Mi"
} }
} }
} }

View file

@ -37,7 +37,7 @@ resource "helm_release" "cnpg" {
resources = { resources = {
requests = { requests = {
cpu = "100m" cpu = "100m"
memory = "128Mi" memory = "256Mi"
} }
limits = { limits = {
memory = "256Mi" memory = "256Mi"

View file

@ -170,10 +170,10 @@ resource "kubernetes_deployment" "crowdsec-web" {
resources { resources {
requests = { requests = {
cpu = "15m" cpu = "15m"
memory = "32Mi" memory = "128Mi"
} }
limits = { limits = {
memory = "256Mi" memory = "128Mi"
} }
} }
} }

View file

@ -78,10 +78,10 @@ resource "helm_release" "mysql_operator" {
resources = { resources = {
requests = { requests = {
cpu = "100m" cpu = "100m"
memory = "256Mi" memory = "384Mi"
} }
limits = { limits = {
memory = "512Mi" memory = "384Mi"
} }
} }
})] })]
@ -181,10 +181,10 @@ resource "helm_release" "mysql_cluster" {
resources = { resources = {
requests = { requests = {
cpu = "250m" cpu = "250m"
memory = "2Gi" memory = "4Gi"
} }
limits = { limits = {
memory = "2Gi" memory = "4Gi"
} }
} }
@ -216,11 +216,11 @@ resource "helm_release" "mysql_cluster" {
name = "mysql" name = "mysql"
resources = { resources = {
requests = { requests = {
memory = "2Gi" memory = "4Gi"
cpu = "250m" cpu = "250m"
} }
limits = { limits = {
memory = "2Gi" memory = "4Gi"
} }
} }
}] }]
@ -546,10 +546,10 @@ resource "kubernetes_deployment" "phpmyadmin" {
resources { resources {
requests = { requests = {
cpu = "15m" cpu = "15m"
memory = "32Mi" memory = "128Mi"
} }
limits = { limits = {
memory = "256Mi" memory = "128Mi"
} }
} }
} }
@ -977,7 +977,7 @@ resource "kubernetes_deployment" "pgadmin" {
resources { resources {
requests = { requests = {
cpu = "25m" cpu = "25m"
memory = "128Mi" memory = "512Mi"
} }
limits = { limits = {
memory = "512Mi" memory = "512Mi"

View file

@ -79,10 +79,10 @@ resource "kubernetes_deployment" "headscale" {
resources { resources {
requests = { requests = {
cpu = "50m" cpu = "50m"
memory = "64Mi" memory = "128Mi"
} }
limits = { limits = {
memory = "256Mi" memory = "128Mi"
} }
} }
@ -163,7 +163,7 @@ resource "kubernetes_deployment" "headscale" {
resources { resources {
requests = { requests = {
cpu = "25m" cpu = "25m"
memory = "32Mi" memory = "128Mi"
} }
limits = { limits = {
memory = "128Mi" memory = "128Mi"

View file

@ -38,8 +38,8 @@ resource "helm_release" "democratic_csi" {
replicas = 2 replicas = 2
driver = { driver = {
resources = { resources = {
requests = { cpu = "25m", memory = "64Mi" } requests = { cpu = "25m", memory = "192Mi" }
limits = { memory = "256Mi" } limits = { memory = "192Mi" }
} }
} }
} }
@ -47,8 +47,8 @@ resource "helm_release" "democratic_csi" {
node = { node = {
driver = { driver = {
resources = { resources = {
requests = { cpu = "25m", memory = "64Mi" } requests = { cpu = "25m", memory = "192Mi" }
limits = { memory = "256Mi" } limits = { memory = "192Mi" }
} }
} }

View file

@ -72,7 +72,7 @@ resource "kubernetes_deployment" "k8s_portal" {
resources { resources {
requests = { requests = {
cpu = "10m" cpu = "10m"
memory = "32Mi" memory = "128Mi"
} }
limits = { limits = {
memory = "128Mi" memory = "128Mi"

View file

@ -30,7 +30,31 @@ resource "helm_release" "kyverno" {
reportsController = { reportsController = {
resources = { resources = {
limits = { limits = {
memory = "512Mi" memory = "128Mi"
}
requests = {
cpu = "100m"
memory = "128Mi"
}
}
}
backgroundController = {
resources = {
limits = {
memory = "384Mi"
}
requests = {
cpu = "100m"
memory = "384Mi"
}
}
}
cleanupController = {
resources = {
limits = {
memory = "128Mi"
} }
requests = { requests = {
cpu = "100m" cpu = "100m"
@ -45,11 +69,11 @@ resource "helm_release" "kyverno" {
container = { container = {
resources = { resources = {
limits = { limits = {
memory = "768Mi" memory = "256Mi"
} }
requests = { requests = {
cpu = "100m" cpu = "100m"
memory = "128Mi" memory = "256Mi"
} }
} }
} }

View file

@ -362,7 +362,7 @@ resource "kubernetes_deployment" "mailserver" {
resources { resources {
requests = { requests = {
cpu = "25m" cpu = "25m"
memory = "128Mi" memory = "512Mi"
} }
limits = { limits = {
memory = "512Mi" memory = "512Mi"
@ -391,10 +391,10 @@ resource "kubernetes_deployment" "mailserver" {
resources { resources {
requests = { requests = {
cpu = "10m" cpu = "10m"
memory = "16Mi" memory = "32Mi"
} }
limits = { limits = {
memory = "64Mi" memory = "32Mi"
} }
} }
} }

View file

@ -155,6 +155,15 @@ resource "kubernetes_deployment" "roundcubemail" {
name = "enigma" name = "enigma"
mount_path = "/var/roundcube/enigma" mount_path = "/var/roundcube/enigma"
} }
resources {
requests = {
cpu = "25m"
memory = "192Mi"
}
limits = {
memory = "192Mi"
}
}
} }
# volume { # volume {

View file

@ -1,2 +1,8 @@
args: args:
- "--kubelet-insecure-tls" - "--kubelet-insecure-tls"
resources:
requests:
cpu: 50m
memory: 200Mi
limits:
memory: 200Mi

View file

@ -29,10 +29,10 @@ resource "helm_release" "caretta" {
resources = { resources = {
requests = { requests = {
cpu = "10m" cpu = "10m"
memory = "300Mi" memory = "768Mi"
} }
limits = { limits = {
memory = "512Mi" memory = "768Mi"
} }
} }
})] })]

View file

@ -40,10 +40,10 @@ resource "kubernetes_deployment" "goflow2" {
resources { resources {
requests = { requests = {
cpu = "50m" cpu = "50m"
memory = "64Mi" memory = "128Mi"
} }
limits = { limits = {
memory = "256Mi" memory = "128Mi"
} }
} }
} }

View file

@ -5,7 +5,7 @@ adminPassword: "${grafana_admin_password}"
resources: resources:
requests: requests:
cpu: 50m cpu: 50m
memory: 128Mi memory: 512Mi
limits: limits:
memory: 512Mi memory: 512Mi
topologySpreadConstraints: topologySpreadConstraints:

View file

@ -123,8 +123,20 @@ alertmanager:
# web.external-url seems to be hardcoded, edited deployment manually # web.external-url seems to be hardcoded, edited deployment manually
# extraArgs: # extraArgs:
# web.external-url: "https://prometheus.viktorbarzin.me" # web.external-url: "https://prometheus.viktorbarzin.me"
resources:
requests:
cpu: 25m
memory: 256Mi
limits:
memory: 256Mi
prometheus-node-exporter: prometheus-node-exporter:
enabled: true enabled: true
resources:
requests:
cpu: 25m
memory: 100Mi
limits:
memory: 100Mi
server: server:
# Enable me to delete metrics # Enable me to delete metrics
extraFlags: extraFlags:

View file

@ -49,6 +49,16 @@ resource "kubernetes_deployment" "pve_exporter" {
container_port = 9221 container_port = 9221
} }
resources {
requests = {
cpu = "15m"
memory = "256Mi"
}
limits = {
memory = "256Mi"
}
}
# Mount the file into the container # Mount the file into the container
volume_mount { volume_mount {
name = "config-volume" name = "config-volume"

View file

@ -54,6 +54,17 @@ resource "kubernetes_deployment" "snmp-exporter" {
image = "prom/snmp-exporter" image = "prom/snmp-exporter"
name = "snmp-exporter" name = "snmp-exporter"
# command = ["/usr/local/bin/redfish_exporter", "--config.file", "/app/config.yml"] # command = ["/usr/local/bin/redfish_exporter", "--config.file", "/app/config.yml"]
resources {
requests = {
cpu = "10m"
memory = "256Mi"
}
limits = {
memory = "256Mi"
}
}
port { port {
container_port = 9116 container_port = 9116
} }

View file

@ -24,14 +24,42 @@ resource "helm_release" "nfs_csi_driver" {
controller = { controller = {
replicas = 2 replicas = 2
resources = { resources = {
requests = { cpu = "10m", memory = "32Mi" } csiProvisioner = {
limits = { memory = "128Mi" } requests = { cpu = "10m", memory = "128Mi" }
limits = { memory = "128Mi" }
}
csiResizer = {
requests = { cpu = "10m", memory = "128Mi" }
limits = { memory = "128Mi" }
}
csiSnapshotter = {
requests = { cpu = "10m", memory = "128Mi" }
limits = { memory = "128Mi" }
}
nfs = {
requests = { cpu = "10m", memory = "128Mi" }
limits = { memory = "128Mi" }
}
livenessProbe = {
requests = { cpu = "10m", memory = "64Mi" }
limits = { memory = "64Mi" }
}
} }
} }
node = { node = {
resources = { resources = {
requests = { cpu = "10m", memory = "32Mi" } nfs = {
limits = { memory = "128Mi" } requests = { cpu = "10m", memory = "128Mi" }
limits = { memory = "128Mi" }
}
livenessProbe = {
requests = { cpu = "10m", memory = "64Mi" }
limits = { memory = "64Mi" }
}
nodeDriverRegistrar = {
requests = { cpu = "10m", memory = "64Mi" }
limits = { memory = "64Mi" }
}
} }
} }
storageClass = { storageClass = {

View file

@ -617,7 +617,7 @@ resource "kubernetes_daemonset" "gpu_pod_exporter" {
memory = "128Mi" memory = "128Mi"
} }
limits = { limits = {
memory = "256Mi" memory = "128Mi"
"nvidia.com/gpu" = "1" "nvidia.com/gpu" = "1"
} }
} }

View file

@ -51,7 +51,7 @@ resource "helm_release" "redis" {
memory = "64Mi" memory = "64Mi"
} }
limits = { limits = {
memory = "128Mi" memory = "64Mi"
} }
} }
} }
@ -69,7 +69,7 @@ resource "helm_release" "redis" {
memory = "64Mi" memory = "64Mi"
} }
limits = { limits = {
memory = "256Mi" memory = "64Mi"
} }
} }
} }
@ -89,7 +89,7 @@ resource "helm_release" "redis" {
memory = "64Mi" memory = "64Mi"
} }
limits = { limits = {
memory = "256Mi" memory = "64Mi"
} }
} }
} }
@ -202,7 +202,7 @@ resource "kubernetes_deployment" "haproxy" {
memory = "16Mi" memory = "16Mi"
} }
limits = { limits = {
memory = "32Mi" memory = "16Mi"
} }
} }
liveness_probe { liveness_probe {

View file

@ -35,10 +35,10 @@ resource "helm_release" "sealed_secrets" {
resources = { resources = {
requests = { requests = {
cpu = "50m" cpu = "50m"
memory = "64Mi" memory = "192Mi"
} }
limits = { limits = {
memory = "256Mi" memory = "192Mi"
} }
} }
})] })]

View file

@ -106,7 +106,7 @@ resource "kubernetes_deployment" "technitium_secondary" {
resources { resources {
requests = { requests = {
cpu = "25m" cpu = "25m"
memory = "128Mi" memory = "512Mi"
} }
limits = { limits = {
memory = "512Mi" memory = "512Mi"

View file

@ -166,7 +166,7 @@ resource "kubernetes_deployment" "technitium" {
resources { resources {
requests = { requests = {
cpu = "25m" cpu = "25m"
memory = "128Mi" memory = "512Mi"
} }
limits = { limits = {
memory = "512Mi" memory = "512Mi"

View file

@ -201,7 +201,10 @@ resource "helm_release" "traefik" {
resources = { resources = {
requests = { requests = {
cpu = "100m" cpu = "100m"
memory = "128Mi" memory = "384Mi"
}
limits = {
memory = "384Mi"
} }
} }
@ -391,10 +394,10 @@ resource "kubernetes_deployment" "bot_block_proxy" {
resources { resources {
requests = { requests = {
cpu = "5m" cpu = "5m"
memory = "32Mi" memory = "64Mi"
} }
limits = { limits = {
memory = "128Mi" memory = "64Mi"
} }
} }
} }
@ -579,10 +582,10 @@ resource "kubernetes_deployment" "auth_proxy" {
resources { resources {
requests = { requests = {
cpu = "5m" cpu = "5m"
memory = "32Mi" memory = "64Mi"
} }
limits = { limits = {
memory = "128Mi" memory = "64Mi"
} }
} }
} }

View file

@ -65,7 +65,7 @@ resource "kubernetes_deployment" "vaultwarden" {
resources { resources {
requests = { requests = {
cpu = "10m" cpu = "10m"
memory = "32Mi" memory = "256Mi"
} }
limits = { limits = {
memory = "256Mi" memory = "256Mi"

View file

@ -34,12 +34,39 @@ resource "helm_release" "vpa" {
values = [yamlencode({ values = [yamlencode({
recommender = { recommender = {
enabled = true enabled = true
resources = {
requests = {
cpu = "50m"
memory = "200Mi"
}
limits = {
memory = "200Mi"
}
}
} }
updater = { updater = {
enabled = true enabled = true
resources = {
requests = {
cpu = "50m"
memory = "200Mi"
}
limits = {
memory = "200Mi"
}
}
} }
admissionController = { admissionController = {
enabled = true enabled = true
resources = {
requests = {
cpu = "50m"
memory = "200Mi"
}
limits = {
memory = "200Mi"
}
}
} }
})] })]
} }

View file

@ -144,10 +144,10 @@ resource "kubernetes_deployment" "wireguard" {
resources { resources {
requests = { requests = {
cpu = "10m" cpu = "10m"
memory = "16Mi" memory = "64Mi"
} }
limits = { limits = {
memory = "128Mi" memory = "64Mi"
} }
} }
} }
@ -174,10 +174,10 @@ resource "kubernetes_deployment" "wireguard" {
resources { resources {
requests = { requests = {
cpu = "10m" cpu = "10m"
memory = "16Mi" memory = "32Mi"
} }
limits = { limits = {
memory = "64Mi" memory = "32Mi"
} }
} }
} }

View file

@ -120,10 +120,10 @@ resource "kubernetes_deployment" "xray" {
resources { resources {
requests = { requests = {
cpu = "10m" cpu = "10m"
memory = "32Mi" memory = "64Mi"
} }
limits = { limits = {
memory = "128Mi" memory = "64Mi"
} }
} }
} }

View file

@ -72,10 +72,10 @@ resource "kubernetes_deployment" "aiostreams" {
resources { resources {
requests = { requests = {
cpu = "25m" cpu = "25m"
memory = "256Mi" memory = "768Mi"
} }
limits = { limits = {
memory = "1Gi" memory = "768Mi"
} }
} }
} }

View file

@ -34,10 +34,10 @@ resource "kubernetes_deployment" "flaresolverr" {
resources { resources {
requests = { requests = {
cpu = "10m" cpu = "10m"
memory = "150Mi" memory = "512Mi"
} }
limits = { limits = {
memory = "384Mi" memory = "512Mi"
} }
} }
port { port {

View file

@ -59,10 +59,10 @@ resource "kubernetes_deployment" "listenarr" {
resources { resources {
requests = { requests = {
cpu = "25m" cpu = "25m"
memory = "256Mi" memory = "768Mi"
} }
limits = { limits = {
memory = "1Gi" memory = "768Mi"
} }
} }
} }

View file

@ -44,8 +44,8 @@ resource "helm_release" "vault" {
enabled = true enabled = true
resources = { resources = {
requests = { memory = "128Mi", cpu = "100m" } requests = { memory = "384Mi", cpu = "100m" }
limits = { memory = "512Mi" } limits = { memory = "384Mi" }
} }
# Allow scheduling on GPU node (node1) # Allow scheduling on GPU node (node1)