From 26ba9ea3710d07b97cd4f911d0878dbf6dfdac04 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 21 Feb 2026 21:04:08 +0000 Subject: [PATCH] [ci skip] Fix Prometheus storage alert and Grafana quota exhaustion - Enable size-based TSDB retention (45GB) to clean up old blocks (including 2021-era blocks with failed compaction) - Increase monitoring namespace quota from 64/128Gi to 80/160Gi CPU/memory limits to allow Grafana rolling updates --- modules/kubernetes/monitoring/main.tf | 4 ++-- modules/kubernetes/monitoring/prometheus_chart_values.tpl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/kubernetes/monitoring/main.tf b/modules/kubernetes/monitoring/main.tf index baf05064..b9310b32 100644 --- a/modules/kubernetes/monitoring/main.tf +++ b/modules/kubernetes/monitoring/main.tf @@ -194,8 +194,8 @@ resource "kubernetes_resource_quota" "monitoring" { hard = { "requests.cpu" = "16" "requests.memory" = "16Gi" - "limits.cpu" = "64" - "limits.memory" = "128Gi" + "limits.cpu" = "80" + "limits.memory" = "160Gi" pods = "100" } } diff --git a/modules/kubernetes/monitoring/prometheus_chart_values.tpl b/modules/kubernetes/monitoring/prometheus_chart_values.tpl index ea9245f3..0cd47b85 100755 --- a/modules/kubernetes/monitoring/prometheus_chart_values.tpl +++ b/modules/kubernetes/monitoring/prometheus_chart_values.tpl @@ -109,7 +109,7 @@ server: # - "web.enable-admin-api" - "web.enable-lifecycle" - "storage.tsdb.allow-overlapping-blocks" - # - "storage.tsdb.retention.size=1GB" + - "storage.tsdb.retention.size=45GB" - "storage.tsdb.wal-compression" persistentVolume: # enabled: false