After node2 OOM incident, right-size memory across the cluster by setting requests=limits based on max_over_time(container_memory_working_set_bytes[7d]) with 1.3x headroom. Eliminates ~37Gi overcommit gap. Categories: - Safe equalization (50 containers): set req=lim where max7d well within target - Limit increases (8 containers): raise limits for services spiking above current - No Prometheus data (12 containers): conservatively set lim=req - Exception: nextcloud keeps req=256Mi/lim=8Gi due to Apache memory spikes Also increased dbaas namespace quota from 12Gi to 16Gi to accommodate mysql 4Gi limits across 3 replicas.
53 lines
1.4 KiB
HCL
53 lines
1.4 KiB
HCL
variable "tier" { type = string }
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Namespace
|
|
# -----------------------------------------------------------------------------
|
|
resource "kubernetes_namespace" "cnpg_system" {
|
|
metadata {
|
|
name = "cnpg-system"
|
|
labels = {
|
|
tier = var.tier
|
|
}
|
|
}
|
|
}
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# CloudNativePG Operator — manages PostgreSQL clusters via CRDs
|
|
# https://cloudnative-pg.io/
|
|
# -----------------------------------------------------------------------------
|
|
resource "helm_release" "cnpg" {
|
|
namespace = kubernetes_namespace.cnpg_system.metadata[0].name
|
|
create_namespace = false
|
|
name = "cnpg"
|
|
atomic = true
|
|
timeout = 300
|
|
|
|
repository = "https://cloudnative-pg.github.io/charts"
|
|
chart = "cloudnative-pg"
|
|
version = "0.27.1"
|
|
|
|
values = [yamlencode({
|
|
crds = {
|
|
create = true
|
|
}
|
|
|
|
replicaCount = 1
|
|
|
|
resources = {
|
|
requests = {
|
|
cpu = "100m"
|
|
memory = "256Mi"
|
|
}
|
|
limits = {
|
|
memory = "256Mi"
|
|
}
|
|
}
|
|
})]
|
|
}
|
|
|
|
# NOTE: local-path-provisioner is already installed in the cluster
|
|
# (via cloud-init template) with StorageClass "local-path" (default).
|
|
# ReclaimPolicy is "Delete" — for CNPG clusters, set
|
|
# .spec.storage.pvcTemplate.storageClassName = "local-path" in the
|
|
# Cluster CR. CNPG handles PVC lifecycle independently.
|