equalize memory req=lim across 70+ containers using Prometheus 7d max data
After node2 OOM incident, right-size memory across the cluster by setting requests=limits based on max_over_time(container_memory_working_set_bytes[7d]) with 1.3x headroom. Eliminates ~37Gi overcommit gap. Categories: - Safe equalization (50 containers): set req=lim where max7d well within target - Limit increases (8 containers): raise limits for services spiking above current - No Prometheus data (12 containers): conservatively set lim=req - Exception: nextcloud keeps req=256Mi/lim=8Gi due to Apache memory spikes Also increased dbaas namespace quota from 12Gi to 16Gi to accommodate mysql 4Gi limits across 3 replicas.
This commit is contained in:
parent
eb0301b02b
commit
23019da8e5
39 changed files with 211 additions and 74 deletions
|
|
@ -29,10 +29,10 @@ resource "helm_release" "caretta" {
|
|||
resources = {
|
||||
requests = {
|
||||
cpu = "10m"
|
||||
memory = "300Mi"
|
||||
memory = "768Mi"
|
||||
}
|
||||
limits = {
|
||||
memory = "512Mi"
|
||||
memory = "768Mi"
|
||||
}
|
||||
}
|
||||
})]
|
||||
|
|
|
|||
|
|
@ -40,10 +40,10 @@ resource "kubernetes_deployment" "goflow2" {
|
|||
resources {
|
||||
requests = {
|
||||
cpu = "50m"
|
||||
memory = "64Mi"
|
||||
memory = "128Mi"
|
||||
}
|
||||
limits = {
|
||||
memory = "256Mi"
|
||||
memory = "128Mi"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ adminPassword: "${grafana_admin_password}"
|
|||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 128Mi
|
||||
memory: 512Mi
|
||||
limits:
|
||||
memory: 512Mi
|
||||
topologySpreadConstraints:
|
||||
|
|
|
|||
|
|
@ -123,8 +123,20 @@ alertmanager:
|
|||
# web.external-url seems to be hardcoded, edited deployment manually
|
||||
# extraArgs:
|
||||
# web.external-url: "https://prometheus.viktorbarzin.me"
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
memory: 256Mi
|
||||
prometheus-node-exporter:
|
||||
enabled: true
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 100Mi
|
||||
limits:
|
||||
memory: 100Mi
|
||||
server:
|
||||
# Enable me to delete metrics
|
||||
extraFlags:
|
||||
|
|
|
|||
|
|
@ -49,6 +49,16 @@ resource "kubernetes_deployment" "pve_exporter" {
|
|||
container_port = 9221
|
||||
}
|
||||
|
||||
resources {
|
||||
requests = {
|
||||
cpu = "15m"
|
||||
memory = "256Mi"
|
||||
}
|
||||
limits = {
|
||||
memory = "256Mi"
|
||||
}
|
||||
}
|
||||
|
||||
# Mount the file into the container
|
||||
volume_mount {
|
||||
name = "config-volume"
|
||||
|
|
|
|||
|
|
@ -54,6 +54,17 @@ resource "kubernetes_deployment" "snmp-exporter" {
|
|||
image = "prom/snmp-exporter"
|
||||
name = "snmp-exporter"
|
||||
# command = ["/usr/local/bin/redfish_exporter", "--config.file", "/app/config.yml"]
|
||||
|
||||
resources {
|
||||
requests = {
|
||||
cpu = "10m"
|
||||
memory = "256Mi"
|
||||
}
|
||||
limits = {
|
||||
memory = "256Mi"
|
||||
}
|
||||
}
|
||||
|
||||
port {
|
||||
container_port = 9116
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue