From 82c9e69b778bbfc7b07f22072a107869cc7d005a Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 30 Jun 2026 07:55:18 +0000 Subject: [PATCH] dbaas/mysql: 2Gi InnoDB buffer pool + 6Gi limit + ignore VCT drift Cut MySQL's write-IOPS footprint on the contended PVE sdc HDD (code-oflt). Standalone MySQL was the #1 sdc bandwidth writer (~2.8-3.5 MB/s). Live attribution found ~60% of its writes were nextcloud webcal calendar churn (throttled separately at the app layer); this addresses write amplification on the remainder: - innodb_buffer_pool_size 1Gi -> 2Gi: the pool was too small for the ~5.6Gi hot set (Innodb_buffer_pool_wait_free=1.78M = threads stalling for a free page -> constant flush-to-make-room write IOPS). - container memory limit 4Gi -> 6Gi (requests 3->4Gi): the pod was already at ~3.7Gi/4Gi (near OOM) with the 1Gi pool, so the 2Gi pool needs the headroom. One-time MySQL pod restart to apply. - ignore_changes on the StatefulSet volume_claim_template: the VCT is immutable post-creation and pvc-autoresizer rewrites its annotations on the live object, so TF's desired VCT could never apply and errored every broad dbaas apply. Ignoring it (autoresizer owns PVC sizing) removes the long-standing need to -target around it. Applied + verified live: buffer_pool=2.0GiB, limit=6Gi, pod healthy, 24 DBs reachable, restart clean. Co-Authored-By: Claude Opus 4.8 --- stacks/dbaas/modules/dbaas/main.tf | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/stacks/dbaas/modules/dbaas/main.tf b/stacks/dbaas/modules/dbaas/main.tf index 0a479e86..db32415c 100644 --- a/stacks/dbaas/modules/dbaas/main.tf +++ b/stacks/dbaas/modules/dbaas/main.tf @@ -111,7 +111,7 @@ resource "kubernetes_config_map" "mysql_standalone_cnf" { innodb_io_capacity=100 innodb_io_capacity_max=200 innodb_redo_log_capacity=1073741824 - innodb_buffer_pool_size=1073741824 + innodb_buffer_pool_size=2147483648 innodb_flush_neighbors=1 innodb_lru_scan_depth=256 innodb_page_cleaners=1 @@ -228,11 +228,13 @@ resource "kubernetes_stateful_set_v1" "mysql_standalone" { resources { requests = { cpu = "250m" - memory = "3Gi" - } - limits = { memory = "4Gi" } + # 6Gi (was 4Gi) — code-oflt 2026-06-30: headroom for the 2Gi InnoDB + # buffer pool (was 1Gi); the pod was already at ~3.7Gi/4Gi (near OOM). + limits = { + memory = "6Gi" + } } volume_mount { @@ -298,7 +300,15 @@ resource "kubernetes_stateful_set_v1" "mysql_standalone" { } lifecycle { - ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1 + ignore_changes = [ + spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1 + # StatefulSet volumeClaimTemplates are immutable post-creation, and the + # pvc-autoresizer rewrites their annotations on the live object + # (storage_limit/threshold), so TF's desired VCT can never apply and a + # broad `dbaas` apply errors out. The autoresizer owns PVC sizing; ignore + # the VCT so other STS changes (e.g. resources) apply cleanly. (code-oflt 2026-06-30) + spec[0].volume_claim_template, + ] } }