[ci skip] Fix Prometheus storage alert and Grafana quota exhaustion

- Enable size-based TSDB retention (45GB) to clean up old blocks
  (including 2021-era blocks with failed compaction)
- Increase monitoring namespace quota from 64/128Gi to 80/160Gi
  CPU/memory limits to allow Grafana rolling updates
This commit is contained in:
Viktor Barzin 2026-02-21 21:04:08 +00:00
parent dcce738641
commit 26ba9ea371
2 changed files with 3 additions and 3 deletions

View file

@ -194,8 +194,8 @@ resource "kubernetes_resource_quota" "monitoring" {
hard = {
"requests.cpu" = "16"
"requests.memory" = "16Gi"
"limits.cpu" = "64"
"limits.memory" = "128Gi"
"limits.cpu" = "80"
"limits.memory" = "160Gi"
pods = "100"
}
}

View file

@ -109,7 +109,7 @@ server:
# - "web.enable-admin-api"
- "web.enable-lifecycle"
- "storage.tsdb.allow-overlapping-blocks"
# - "storage.tsdb.retention.size=1GB"
- "storage.tsdb.retention.size=45GB"
- "storage.tsdb.wal-compression"
persistentVolume:
# enabled: false