Phase 1 - Quick wins (~4.5 Gi saved): - democratic-csi: add explicit sidecar resources (64-80Mi vs 256Mi LimitRange default) - caretta: 768Mi → 600Mi (VPA upper 485Mi) - immich-ml: 4Gi → 3584Mi (VPA upper 2.95Gi, GPU margin) - onlyoffice: 3Gi → 2304Mi (VPA upper 1.82Gi) Phase 2 - Safety fixes (prevent OOMKills): - frigate: 2Gi/8Gi → 5Gi/10Gi (VPA upper 7.7Gi, was 4% headroom) - openclaw: 1280Mi req → 2Gi req=limit (documented 2Gi requirement) Phase 3 - Additional right-sizing: - authentik workers: 1Gi → 896Mi x3 (VPA upper 722Mi) - shlink: 512Mi/768Mi → 960Mi req=limit (VPA upper 780Mi, safety increase) Phase 4 - Burstable QoS for lower tiers: - tier-3-edge: 128Mi/128Mi → 96Mi req / 192Mi limit - tier-4-aux: 128Mi/128Mi → 64Mi req / 256Mi limit Phase 5 - Monitoring: - Add ClusterMemoryRequestsHigh alert (>85% allocatable, 15m) - Add ContainerNearOOM alert (>85% limit, 30m) - Add PodUnschedulable alert (5m, critical) Cluster: 92.7% → 90.8% memory requests. Stirling-pdf now schedulable.
62 lines
1.3 KiB
HCL
62 lines
1.3 KiB
HCL
resource "helm_release" "caretta" {
|
|
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
|
create_namespace = true
|
|
name = "caretta"
|
|
|
|
repository = "https://helm.groundcover.com/"
|
|
chart = "caretta"
|
|
version = "0.0.16"
|
|
|
|
values = [yamlencode({
|
|
grafana = {
|
|
enabled = false
|
|
}
|
|
victoria-metrics-single = {
|
|
enabled = false
|
|
}
|
|
tolerations = [
|
|
{
|
|
key = "node-role.kubernetes.io/control-plane"
|
|
operator = "Exists"
|
|
effect = "NoSchedule"
|
|
},
|
|
{
|
|
key = "nvidia.com/gpu"
|
|
operator = "Exists"
|
|
effect = "NoSchedule"
|
|
}
|
|
]
|
|
resources = {
|
|
requests = {
|
|
cpu = "10m"
|
|
memory = "600Mi"
|
|
}
|
|
limits = {
|
|
memory = "600Mi"
|
|
}
|
|
}
|
|
})]
|
|
}
|
|
|
|
resource "kubernetes_service" "caretta_metrics" {
|
|
metadata {
|
|
name = "caretta-metrics"
|
|
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
|
labels = {
|
|
app = "caretta"
|
|
}
|
|
}
|
|
spec {
|
|
selector = {
|
|
app = "caretta"
|
|
}
|
|
port {
|
|
name = "metrics"
|
|
port = 7117
|
|
target_port = 7117
|
|
protocol = "TCP"
|
|
}
|
|
}
|
|
}
|
|
|
|
# Caretta dashboard is now loaded via the grafana_dashboards for_each in grafana.tf
|