equalize memory req=lim across 70+ containers using Prometheus 7d max data

After node2 OOM incident, right-size memory across the cluster by setting
requests=limits based on max_over_time(container_memory_working_set_bytes[7d])
with 1.3x headroom. Eliminates ~37Gi overcommit gap.

Categories:
- Safe equalization (50 containers): set req=lim where max7d well within target
- Limit increases (8 containers): raise limits for services spiking above current
- No Prometheus data (12 containers): conservatively set lim=req
- Exception: nextcloud keeps req=256Mi/lim=8Gi due to Apache memory spikes

Also increased dbaas namespace quota from 12Gi to 16Gi to accommodate mysql
4Gi limits across 3 replicas.
This commit is contained in:
Viktor Barzin 2026-03-14 21:46:49 +00:00
parent eb0301b02b
commit 23019da8e5
39 changed files with 211 additions and 74 deletions

View file

@ -152,7 +152,7 @@ resource "kubernetes_deployment" "actualbudget-http-api" {
memory = "128Mi"
}
limits = {
memory = "512Mi"
memory = "128Mi"
}
}

View file

@ -33,7 +33,7 @@ variable "gemini_api_key" {
}
variable "memory_limit" {
type = string
default = "256Mi"
default = "128Mi"
}
variable "cpu_request" {
type = string
@ -41,7 +41,7 @@ variable "cpu_request" {
}
variable "memory_request" {
type = string
default = "64Mi"
default = "128Mi"
}
variable "extra_annotations" {
type = map(string)

View file

@ -86,7 +86,7 @@ resource "kubernetes_deployment" "frigate" {
resources {
requests = {
cpu = "1500m"
memory = "2Gi"
memory = "8Gi"
}
limits = {
memory = "8Gi"

View file

@ -104,10 +104,16 @@ collabora:
resources:
limits:
memory: 1Gi
memory: 8Gi
requests:
cpu: 50m
memory: 256Mi
cronjob:
enabled: true
resources:
limits:
memory: 384Mi
requests:
cpu: 25m
memory: 384Mi

View file

@ -105,7 +105,7 @@ resource "kubernetes_deployment" "novelapp" {
cpu = "10m"
}
limits = {
memory = "128Mi"
memory = "64Mi"
}
}
}

View file

@ -119,6 +119,7 @@ resource "kubernetes_deployment" "ollama" {
memory = "256Mi"
}
limits = {
memory = "256Mi"
"nvidia.com/gpu" = "1"
}
}

View file

@ -20,7 +20,7 @@ server:
resources:
requests:
cpu: 100m
memory: 512Mi
memory: 1Gi
limits:
memory: 1Gi
topologySpreadConstraints:
@ -48,7 +48,7 @@ worker:
resources:
requests:
cpu: 100m
memory: 384Mi
memory: 1Gi
limits:
memory: 1Gi
topologySpreadConstraints:

View file

@ -73,10 +73,10 @@ resource "kubernetes_deployment" "cloudflared" {
resources {
requests = {
cpu = "15m"
memory = "32Mi"
memory = "128Mi"
}
limits = {
memory = "256Mi"
memory = "128Mi"
}
}
}

View file

@ -37,7 +37,7 @@ resource "helm_release" "cnpg" {
resources = {
requests = {
cpu = "100m"
memory = "128Mi"
memory = "256Mi"
}
limits = {
memory = "256Mi"

View file

@ -170,10 +170,10 @@ resource "kubernetes_deployment" "crowdsec-web" {
resources {
requests = {
cpu = "15m"
memory = "32Mi"
memory = "128Mi"
}
limits = {
memory = "256Mi"
memory = "128Mi"
}
}
}

View file

@ -78,10 +78,10 @@ resource "helm_release" "mysql_operator" {
resources = {
requests = {
cpu = "100m"
memory = "256Mi"
memory = "384Mi"
}
limits = {
memory = "512Mi"
memory = "384Mi"
}
}
})]
@ -181,10 +181,10 @@ resource "helm_release" "mysql_cluster" {
resources = {
requests = {
cpu = "250m"
memory = "2Gi"
memory = "4Gi"
}
limits = {
memory = "2Gi"
memory = "4Gi"
}
}
@ -216,11 +216,11 @@ resource "helm_release" "mysql_cluster" {
name = "mysql"
resources = {
requests = {
memory = "2Gi"
memory = "4Gi"
cpu = "250m"
}
limits = {
memory = "2Gi"
memory = "4Gi"
}
}
}]
@ -546,10 +546,10 @@ resource "kubernetes_deployment" "phpmyadmin" {
resources {
requests = {
cpu = "15m"
memory = "32Mi"
memory = "128Mi"
}
limits = {
memory = "256Mi"
memory = "128Mi"
}
}
}
@ -977,7 +977,7 @@ resource "kubernetes_deployment" "pgadmin" {
resources {
requests = {
cpu = "25m"
memory = "128Mi"
memory = "512Mi"
}
limits = {
memory = "512Mi"

View file

@ -79,10 +79,10 @@ resource "kubernetes_deployment" "headscale" {
resources {
requests = {
cpu = "50m"
memory = "64Mi"
memory = "128Mi"
}
limits = {
memory = "256Mi"
memory = "128Mi"
}
}
@ -163,7 +163,7 @@ resource "kubernetes_deployment" "headscale" {
resources {
requests = {
cpu = "25m"
memory = "32Mi"
memory = "128Mi"
}
limits = {
memory = "128Mi"

View file

@ -38,8 +38,8 @@ resource "helm_release" "democratic_csi" {
replicas = 2
driver = {
resources = {
requests = { cpu = "25m", memory = "64Mi" }
limits = { memory = "256Mi" }
requests = { cpu = "25m", memory = "192Mi" }
limits = { memory = "192Mi" }
}
}
}
@ -47,8 +47,8 @@ resource "helm_release" "democratic_csi" {
node = {
driver = {
resources = {
requests = { cpu = "25m", memory = "64Mi" }
limits = { memory = "256Mi" }
requests = { cpu = "25m", memory = "192Mi" }
limits = { memory = "192Mi" }
}
}

View file

@ -72,7 +72,7 @@ resource "kubernetes_deployment" "k8s_portal" {
resources {
requests = {
cpu = "10m"
memory = "32Mi"
memory = "128Mi"
}
limits = {
memory = "128Mi"

View file

@ -30,7 +30,31 @@ resource "helm_release" "kyverno" {
reportsController = {
resources = {
limits = {
memory = "512Mi"
memory = "128Mi"
}
requests = {
cpu = "100m"
memory = "128Mi"
}
}
}
backgroundController = {
resources = {
limits = {
memory = "384Mi"
}
requests = {
cpu = "100m"
memory = "384Mi"
}
}
}
cleanupController = {
resources = {
limits = {
memory = "128Mi"
}
requests = {
cpu = "100m"
@ -45,11 +69,11 @@ resource "helm_release" "kyverno" {
container = {
resources = {
limits = {
memory = "768Mi"
memory = "256Mi"
}
requests = {
cpu = "100m"
memory = "128Mi"
memory = "256Mi"
}
}
}

View file

@ -362,7 +362,7 @@ resource "kubernetes_deployment" "mailserver" {
resources {
requests = {
cpu = "25m"
memory = "128Mi"
memory = "512Mi"
}
limits = {
memory = "512Mi"
@ -391,10 +391,10 @@ resource "kubernetes_deployment" "mailserver" {
resources {
requests = {
cpu = "10m"
memory = "16Mi"
memory = "32Mi"
}
limits = {
memory = "64Mi"
memory = "32Mi"
}
}
}

View file

@ -155,6 +155,15 @@ resource "kubernetes_deployment" "roundcubemail" {
name = "enigma"
mount_path = "/var/roundcube/enigma"
}
resources {
requests = {
cpu = "25m"
memory = "192Mi"
}
limits = {
memory = "192Mi"
}
}
}
# volume {

View file

@ -1,2 +1,8 @@
args:
- "--kubelet-insecure-tls"
resources:
requests:
cpu: 50m
memory: 200Mi
limits:
memory: 200Mi

View file

@ -29,10 +29,10 @@ resource "helm_release" "caretta" {
resources = {
requests = {
cpu = "10m"
memory = "300Mi"
memory = "768Mi"
}
limits = {
memory = "512Mi"
memory = "768Mi"
}
}
})]

View file

@ -40,10 +40,10 @@ resource "kubernetes_deployment" "goflow2" {
resources {
requests = {
cpu = "50m"
memory = "64Mi"
memory = "128Mi"
}
limits = {
memory = "256Mi"
memory = "128Mi"
}
}
}

View file

@ -5,7 +5,7 @@ adminPassword: "${grafana_admin_password}"
resources:
requests:
cpu: 50m
memory: 128Mi
memory: 512Mi
limits:
memory: 512Mi
topologySpreadConstraints:

View file

@ -123,8 +123,20 @@ alertmanager:
# web.external-url seems to be hardcoded, edited deployment manually
# extraArgs:
# web.external-url: "https://prometheus.viktorbarzin.me"
resources:
requests:
cpu: 25m
memory: 256Mi
limits:
memory: 256Mi
prometheus-node-exporter:
enabled: true
resources:
requests:
cpu: 25m
memory: 100Mi
limits:
memory: 100Mi
server:
# Enable me to delete metrics
extraFlags:

View file

@ -49,6 +49,16 @@ resource "kubernetes_deployment" "pve_exporter" {
container_port = 9221
}
resources {
requests = {
cpu = "15m"
memory = "256Mi"
}
limits = {
memory = "256Mi"
}
}
# Mount the file into the container
volume_mount {
name = "config-volume"

View file

@ -54,6 +54,17 @@ resource "kubernetes_deployment" "snmp-exporter" {
image = "prom/snmp-exporter"
name = "snmp-exporter"
# command = ["/usr/local/bin/redfish_exporter", "--config.file", "/app/config.yml"]
resources {
requests = {
cpu = "10m"
memory = "256Mi"
}
limits = {
memory = "256Mi"
}
}
port {
container_port = 9116
}

View file

@ -24,14 +24,42 @@ resource "helm_release" "nfs_csi_driver" {
controller = {
replicas = 2
resources = {
requests = { cpu = "10m", memory = "32Mi" }
limits = { memory = "128Mi" }
csiProvisioner = {
requests = { cpu = "10m", memory = "128Mi" }
limits = { memory = "128Mi" }
}
csiResizer = {
requests = { cpu = "10m", memory = "128Mi" }
limits = { memory = "128Mi" }
}
csiSnapshotter = {
requests = { cpu = "10m", memory = "128Mi" }
limits = { memory = "128Mi" }
}
nfs = {
requests = { cpu = "10m", memory = "128Mi" }
limits = { memory = "128Mi" }
}
livenessProbe = {
requests = { cpu = "10m", memory = "64Mi" }
limits = { memory = "64Mi" }
}
}
}
node = {
resources = {
requests = { cpu = "10m", memory = "32Mi" }
limits = { memory = "128Mi" }
nfs = {
requests = { cpu = "10m", memory = "128Mi" }
limits = { memory = "128Mi" }
}
livenessProbe = {
requests = { cpu = "10m", memory = "64Mi" }
limits = { memory = "64Mi" }
}
nodeDriverRegistrar = {
requests = { cpu = "10m", memory = "64Mi" }
limits = { memory = "64Mi" }
}
}
}
storageClass = {

View file

@ -617,7 +617,7 @@ resource "kubernetes_daemonset" "gpu_pod_exporter" {
memory = "128Mi"
}
limits = {
memory = "256Mi"
memory = "128Mi"
"nvidia.com/gpu" = "1"
}
}

View file

@ -51,7 +51,7 @@ resource "helm_release" "redis" {
memory = "64Mi"
}
limits = {
memory = "128Mi"
memory = "64Mi"
}
}
}
@ -69,7 +69,7 @@ resource "helm_release" "redis" {
memory = "64Mi"
}
limits = {
memory = "256Mi"
memory = "64Mi"
}
}
}
@ -89,7 +89,7 @@ resource "helm_release" "redis" {
memory = "64Mi"
}
limits = {
memory = "256Mi"
memory = "64Mi"
}
}
}
@ -202,7 +202,7 @@ resource "kubernetes_deployment" "haproxy" {
memory = "16Mi"
}
limits = {
memory = "32Mi"
memory = "16Mi"
}
}
liveness_probe {

View file

@ -35,10 +35,10 @@ resource "helm_release" "sealed_secrets" {
resources = {
requests = {
cpu = "50m"
memory = "64Mi"
memory = "192Mi"
}
limits = {
memory = "256Mi"
memory = "192Mi"
}
}
})]

View file

@ -106,7 +106,7 @@ resource "kubernetes_deployment" "technitium_secondary" {
resources {
requests = {
cpu = "25m"
memory = "128Mi"
memory = "512Mi"
}
limits = {
memory = "512Mi"

View file

@ -166,7 +166,7 @@ resource "kubernetes_deployment" "technitium" {
resources {
requests = {
cpu = "25m"
memory = "128Mi"
memory = "512Mi"
}
limits = {
memory = "512Mi"

View file

@ -201,7 +201,10 @@ resource "helm_release" "traefik" {
resources = {
requests = {
cpu = "100m"
memory = "128Mi"
memory = "384Mi"
}
limits = {
memory = "384Mi"
}
}
@ -391,10 +394,10 @@ resource "kubernetes_deployment" "bot_block_proxy" {
resources {
requests = {
cpu = "5m"
memory = "32Mi"
memory = "64Mi"
}
limits = {
memory = "128Mi"
memory = "64Mi"
}
}
}
@ -579,10 +582,10 @@ resource "kubernetes_deployment" "auth_proxy" {
resources {
requests = {
cpu = "5m"
memory = "32Mi"
memory = "64Mi"
}
limits = {
memory = "128Mi"
memory = "64Mi"
}
}
}

View file

@ -65,7 +65,7 @@ resource "kubernetes_deployment" "vaultwarden" {
resources {
requests = {
cpu = "10m"
memory = "32Mi"
memory = "256Mi"
}
limits = {
memory = "256Mi"

View file

@ -34,12 +34,39 @@ resource "helm_release" "vpa" {
values = [yamlencode({
recommender = {
enabled = true
resources = {
requests = {
cpu = "50m"
memory = "200Mi"
}
limits = {
memory = "200Mi"
}
}
}
updater = {
enabled = true
resources = {
requests = {
cpu = "50m"
memory = "200Mi"
}
limits = {
memory = "200Mi"
}
}
}
admissionController = {
enabled = true
resources = {
requests = {
cpu = "50m"
memory = "200Mi"
}
limits = {
memory = "200Mi"
}
}
}
})]
}

View file

@ -144,10 +144,10 @@ resource "kubernetes_deployment" "wireguard" {
resources {
requests = {
cpu = "10m"
memory = "16Mi"
memory = "64Mi"
}
limits = {
memory = "128Mi"
memory = "64Mi"
}
}
}
@ -174,10 +174,10 @@ resource "kubernetes_deployment" "wireguard" {
resources {
requests = {
cpu = "10m"
memory = "16Mi"
memory = "32Mi"
}
limits = {
memory = "64Mi"
memory = "32Mi"
}
}
}

View file

@ -120,10 +120,10 @@ resource "kubernetes_deployment" "xray" {
resources {
requests = {
cpu = "10m"
memory = "32Mi"
memory = "64Mi"
}
limits = {
memory = "128Mi"
memory = "64Mi"
}
}
}

View file

@ -72,10 +72,10 @@ resource "kubernetes_deployment" "aiostreams" {
resources {
requests = {
cpu = "25m"
memory = "256Mi"
memory = "768Mi"
}
limits = {
memory = "1Gi"
memory = "768Mi"
}
}
}

View file

@ -34,10 +34,10 @@ resource "kubernetes_deployment" "flaresolverr" {
resources {
requests = {
cpu = "10m"
memory = "150Mi"
memory = "512Mi"
}
limits = {
memory = "384Mi"
memory = "512Mi"
}
}
port {

View file

@ -59,10 +59,10 @@ resource "kubernetes_deployment" "listenarr" {
resources {
requests = {
cpu = "25m"
memory = "256Mi"
memory = "768Mi"
}
limits = {
memory = "1Gi"
memory = "768Mi"
}
}
}

View file

@ -44,8 +44,8 @@ resource "helm_release" "vault" {
enabled = true
resources = {
requests = { memory = "128Mi", cpu = "100m" }
limits = { memory = "512Mi" }
requests = { memory = "384Mi", cpu = "100m" }
limits = { memory = "384Mi" }
}
# Allow scheduling on GPU node (node1)