From ce7b8c2b2e6b8e2a9fbc78bb4e49f98cf73a76c0 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 3 Apr 2026 23:30:00 +0300 Subject: [PATCH] add pvc-autoresizer for automatic PVC expansion before volumes fill up [ci skip] Deploy topolvm/pvc-autoresizer controller that monitors kubelet_volume_stats via Prometheus and auto-expands annotated PVCs. Annotated all 9 block-storage PVCs (proxmox-lvm) with per-PVC thresholds and max limits. Updated PVFillingUp alert to critical/10m (means auto-expansion failed) and added PVAutoExpanding info alert at 80%. --- stacks/dbaas/modules/dbaas/main.tf | 12 ++++++ stacks/ebooks/main.tf | 5 +++ .../modules/monitoring/prometheus.tf | 5 +++ .../monitoring/prometheus_chart_values.tpl | 13 +++++-- stacks/nextcloud/main.tf | 5 +++ stacks/novelapp/main.tf | 5 +++ stacks/plotting-book/main.tf | 5 +++ stacks/pvc-autoresizer/main.tf | 4 ++ .../modules/pvc-autoresizer/main.tf | 39 +++++++++++++++++++ stacks/pvc-autoresizer/terragrunt.hcl | 8 ++++ stacks/redis/modules/redis/main.tf | 10 +++++ .../vaultwarden/modules/vaultwarden/main.tf | 5 +++ 12 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 stacks/pvc-autoresizer/main.tf create mode 100644 stacks/pvc-autoresizer/modules/pvc-autoresizer/main.tf create mode 100644 stacks/pvc-autoresizer/terragrunt.hcl diff --git a/stacks/dbaas/modules/dbaas/main.tf b/stacks/dbaas/modules/dbaas/main.tf index 61fb21ac..a283dccc 100644 --- a/stacks/dbaas/modules/dbaas/main.tf +++ b/stacks/dbaas/modules/dbaas/main.tf @@ -157,6 +157,13 @@ resource "helm_release" "mysql_cluster" { datadirVolumeClaimTemplate = { storageClassName = "proxmox-lvm" + metadata = { + annotations = { + "resize.topolvm.io/threshold" = "80%" + "resize.topolvm.io/increase" = "20%" + "resize.topolvm.io/storage_limit" = "100Gi" + } + } resources = { requests = { storage = "30Gi" @@ -897,6 +904,11 @@ resource "null_resource" "pg_cluster" { search_path: '"$user", public' enableAlterSystem: true enableSuperuserAccess: true + inheritedMetadata: + annotations: + resize.topolvm.io/threshold: "80%" + resize.topolvm.io/increase: "20%" + resize.topolvm.io/storage_limit: "100Gi" storage: size: 20Gi storageClass: proxmox-lvm diff --git a/stacks/ebooks/main.tf b/stacks/ebooks/main.tf index b37ff8cb..c8dd006f 100644 --- a/stacks/ebooks/main.tf +++ b/stacks/ebooks/main.tf @@ -145,6 +145,11 @@ resource "kubernetes_persistent_volume_claim" "calibre_config_iscsi" { metadata { name = "ebooks-calibre-config-proxmox" namespace = kubernetes_namespace.ebooks.metadata[0].name + annotations = { + "resize.topolvm.io/threshold" = "80%" + "resize.topolvm.io/increase" = "50%" + "resize.topolvm.io/storage_limit" = "10Gi" + } } spec { access_modes = ["ReadWriteOnce"] diff --git a/stacks/monitoring/modules/monitoring/prometheus.tf b/stacks/monitoring/modules/monitoring/prometheus.tf index 7b12ff31..8c28ac8e 100644 --- a/stacks/monitoring/modules/monitoring/prometheus.tf +++ b/stacks/monitoring/modules/monitoring/prometheus.tf @@ -4,6 +4,11 @@ resource "kubernetes_persistent_volume_claim" "prometheus_server_pvc" { metadata { name = "prometheus-data-proxmox" namespace = kubernetes_namespace.monitoring.metadata[0].name + annotations = { + "resize.topolvm.io/threshold" = "90%" + "resize.topolvm.io/increase" = "10%" + "resize.topolvm.io/storage_limit" = "500Gi" + } } spec { diff --git a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl index a014be95..9c6d49ef 100755 --- a/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl +++ b/stacks/monitoring/modules/monitoring/prometheus_chart_values.tpl @@ -801,13 +801,20 @@ serverFiles: severity: warning annotations: summary: "Disk {{ $labels.mountpoint }} on {{ $labels.instance }}: {{ $value | printf \"%.1f\" }}% free (threshold: 10%)" + - alert: PVAutoExpanding + expr: (kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes) * 100 > 80 and kubelet_volume_stats_capacity_bytes < 1099511627776 + for: 5m + labels: + severity: info + annotations: + summary: "PV {{ $labels.persistentvolumeclaim }} in {{ $labels.namespace }}: {{ $value | printf \"%.0f\" }}% used — auto-expansion should trigger" - alert: PVFillingUp expr: (kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes) * 100 > 95 and kubelet_volume_stats_capacity_bytes < 1099511627776 - for: 30m + for: 10m labels: - severity: warning + severity: critical annotations: - summary: "PV {{ $labels.persistentvolumeclaim }} in {{ $labels.namespace }}: {{ $value | printf \"%.0f\" }}% used (threshold: 95%)" + summary: "PV {{ $labels.persistentvolumeclaim }} in {{ $labels.namespace }}: {{ $value | printf \"%.0f\" }}% used — auto-expansion may have failed" - alert: PVPredictedFull expr: predict_linear(kubelet_volume_stats_used_bytes[6h], 3600*24) > kubelet_volume_stats_capacity_bytes for: 1h diff --git a/stacks/nextcloud/main.tf b/stacks/nextcloud/main.tf index 4349865d..6cd0eb8c 100644 --- a/stacks/nextcloud/main.tf +++ b/stacks/nextcloud/main.tf @@ -188,6 +188,11 @@ resource "kubernetes_persistent_volume_claim" "nextcloud_data_iscsi" { metadata { name = "nextcloud-data-proxmox" namespace = kubernetes_namespace.nextcloud.metadata[0].name + annotations = { + "resize.topolvm.io/threshold" = "80%" + "resize.topolvm.io/increase" = "20%" + "resize.topolvm.io/storage_limit" = "100Gi" + } } spec { access_modes = ["ReadWriteOnce"] diff --git a/stacks/novelapp/main.tf b/stacks/novelapp/main.tf index 2a65b64f..43dcd97f 100644 --- a/stacks/novelapp/main.tf +++ b/stacks/novelapp/main.tf @@ -50,6 +50,11 @@ resource "kubernetes_persistent_volume_claim" "novelapp-data" { metadata { name = "novelapp-data-proxmox" namespace = kubernetes_namespace.novelapp.metadata[0].name + annotations = { + "resize.topolvm.io/threshold" = "80%" + "resize.topolvm.io/increase" = "100%" + "resize.topolvm.io/storage_limit" = "5Gi" + } } spec { access_modes = ["ReadWriteOnce"] diff --git a/stacks/plotting-book/main.tf b/stacks/plotting-book/main.tf index b7cfb4b6..a1e65152 100644 --- a/stacks/plotting-book/main.tf +++ b/stacks/plotting-book/main.tf @@ -50,6 +50,11 @@ resource "kubernetes_persistent_volume_claim" "plotting-book-data" { metadata { name = "plotting-book-data-proxmox" namespace = kubernetes_namespace.plotting-book.metadata[0].name + annotations = { + "resize.topolvm.io/threshold" = "80%" + "resize.topolvm.io/increase" = "100%" + "resize.topolvm.io/storage_limit" = "5Gi" + } } spec { access_modes = ["ReadWriteOnce"] diff --git a/stacks/pvc-autoresizer/main.tf b/stacks/pvc-autoresizer/main.tf new file mode 100644 index 00000000..831b50e3 --- /dev/null +++ b/stacks/pvc-autoresizer/main.tf @@ -0,0 +1,4 @@ +module "pvc_autoresizer" { + source = "./modules/pvc-autoresizer" + tier = local.tiers.cluster +} diff --git a/stacks/pvc-autoresizer/modules/pvc-autoresizer/main.tf b/stacks/pvc-autoresizer/modules/pvc-autoresizer/main.tf new file mode 100644 index 00000000..978a6876 --- /dev/null +++ b/stacks/pvc-autoresizer/modules/pvc-autoresizer/main.tf @@ -0,0 +1,39 @@ +variable "tier" { type = string } + +resource "kubernetes_namespace" "pvc_autoresizer" { + metadata { + name = "pvc-autoresizer" + labels = { + tier = var.tier + } + } +} + +resource "helm_release" "pvc_autoresizer" { + namespace = kubernetes_namespace.pvc_autoresizer.metadata[0].name + create_namespace = false + name = "pvc-autoresizer" + atomic = true + timeout = 300 + + repository = "https://topolvm.github.io/pvc-autoresizer" + chart = "pvc-autoresizer" + + values = [yamlencode({ + controller = { + args = { + prometheusURL = "http://prometheus-server.monitoring.svc.cluster.local:80" + interval = "10m" + } + resources = { + requests = { + memory = "64Mi" + cpu = "10m" + } + limits = { + memory = "128Mi" + } + } + } + })] +} diff --git a/stacks/pvc-autoresizer/terragrunt.hcl b/stacks/pvc-autoresizer/terragrunt.hcl new file mode 100644 index 00000000..4f16dddf --- /dev/null +++ b/stacks/pvc-autoresizer/terragrunt.hcl @@ -0,0 +1,8 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "infra" { + config_path = "../infra" + skip_outputs = true +} diff --git a/stacks/redis/modules/redis/main.tf b/stacks/redis/modules/redis/main.tf index 557b1eb3..1c79cb0e 100644 --- a/stacks/redis/modules/redis/main.tf +++ b/stacks/redis/modules/redis/main.tf @@ -61,6 +61,11 @@ resource "helm_release" "redis" { enabled = true storageClass = "proxmox-lvm" size = "2Gi" + annotations = { + "resize.topolvm.io/threshold" = "80%" + "resize.topolvm.io/increase" = "50%" + "resize.topolvm.io/storage_limit" = "10Gi" + } } resources = { @@ -81,6 +86,11 @@ resource "helm_release" "redis" { enabled = true storageClass = "proxmox-lvm" size = "2Gi" + annotations = { + "resize.topolvm.io/threshold" = "80%" + "resize.topolvm.io/increase" = "50%" + "resize.topolvm.io/storage_limit" = "10Gi" + } } resources = { diff --git a/stacks/vaultwarden/modules/vaultwarden/main.tf b/stacks/vaultwarden/modules/vaultwarden/main.tf index cc9c2199..00708b66 100644 --- a/stacks/vaultwarden/modules/vaultwarden/main.tf +++ b/stacks/vaultwarden/modules/vaultwarden/main.tf @@ -24,6 +24,11 @@ resource "kubernetes_persistent_volume_claim" "vaultwarden_data" { metadata { name = "vaultwarden-data-proxmox" namespace = kubernetes_namespace.vaultwarden.metadata[0].name + annotations = { + "resize.topolvm.io/threshold" = "80%" + "resize.topolvm.io/increase" = "100%" + "resize.topolvm.io/storage_limit" = "5Gi" + } } spec { access_modes = ["ReadWriteOnce"]