After node2 OOM incident, right-size memory across the cluster by setting requests=limits based on max_over_time(container_memory_working_set_bytes[7d]) with 1.3x headroom. Eliminates ~37Gi overcommit gap. Categories: - Safe equalization (50 containers): set req=lim where max7d well within target - Limit increases (8 containers): raise limits for services spiking above current - No Prometheus data (12 containers): conservatively set lim=req - Exception: nextcloud keeps req=256Mi/lim=8Gi due to Apache memory spikes Also increased dbaas namespace quota from 12Gi to 16Gi to accommodate mysql 4Gi limits across 3 replicas.
90 lines
2.1 KiB
HCL
90 lines
2.1 KiB
HCL
variable "tier" { type = string }
|
|
variable "nfs_server" { type = string }
|
|
|
|
resource "kubernetes_namespace" "nfs_csi" {
|
|
metadata {
|
|
name = "nfs-csi"
|
|
labels = {
|
|
tier = var.tier
|
|
}
|
|
}
|
|
}
|
|
|
|
resource "helm_release" "nfs_csi_driver" {
|
|
namespace = kubernetes_namespace.nfs_csi.metadata[0].name
|
|
create_namespace = false
|
|
name = "csi-driver-nfs"
|
|
atomic = true
|
|
timeout = 300
|
|
|
|
repository = "https://raw.githubusercontent.com/kubernetes-csi/csi-driver-nfs/master/charts"
|
|
chart = "csi-driver-nfs"
|
|
|
|
values = [yamlencode({
|
|
controller = {
|
|
replicas = 2
|
|
resources = {
|
|
csiProvisioner = {
|
|
requests = { cpu = "10m", memory = "128Mi" }
|
|
limits = { memory = "128Mi" }
|
|
}
|
|
csiResizer = {
|
|
requests = { cpu = "10m", memory = "128Mi" }
|
|
limits = { memory = "128Mi" }
|
|
}
|
|
csiSnapshotter = {
|
|
requests = { cpu = "10m", memory = "128Mi" }
|
|
limits = { memory = "128Mi" }
|
|
}
|
|
nfs = {
|
|
requests = { cpu = "10m", memory = "128Mi" }
|
|
limits = { memory = "128Mi" }
|
|
}
|
|
livenessProbe = {
|
|
requests = { cpu = "10m", memory = "64Mi" }
|
|
limits = { memory = "64Mi" }
|
|
}
|
|
}
|
|
}
|
|
node = {
|
|
resources = {
|
|
nfs = {
|
|
requests = { cpu = "10m", memory = "128Mi" }
|
|
limits = { memory = "128Mi" }
|
|
}
|
|
livenessProbe = {
|
|
requests = { cpu = "10m", memory = "64Mi" }
|
|
limits = { memory = "64Mi" }
|
|
}
|
|
nodeDriverRegistrar = {
|
|
requests = { cpu = "10m", memory = "64Mi" }
|
|
limits = { memory = "64Mi" }
|
|
}
|
|
}
|
|
}
|
|
storageClass = {
|
|
create = false
|
|
}
|
|
})]
|
|
}
|
|
|
|
resource "kubernetes_storage_class" "nfs_truenas" {
|
|
metadata {
|
|
name = "nfs-truenas"
|
|
}
|
|
storage_provisioner = "nfs.csi.k8s.io"
|
|
reclaim_policy = "Retain"
|
|
volume_binding_mode = "Immediate"
|
|
|
|
mount_options = [
|
|
"soft",
|
|
"timeo=30",
|
|
"retrans=3",
|
|
"actimeo=5",
|
|
]
|
|
|
|
parameters = {
|
|
server = var.nfs_server
|
|
share = "/mnt/main"
|
|
}
|
|
}
|