diff --git a/stacks/dbaas/modules/dbaas/main.tf b/stacks/dbaas/modules/dbaas/main.tf index 31896be8..a7fa3cc1 100644 --- a/stacks/dbaas/modules/dbaas/main.tf +++ b/stacks/dbaas/modules/dbaas/main.tf @@ -83,301 +83,6 @@ module "tls_secret" { tls_secret_name = var.tls_secret_name } - -#### MYSQL — InnoDB Cluster via MySQL Operator -# -# 3 MySQL servers with Group Replication + 1 MySQL Router for auto-failover. -# Operator installed in mysql-operator namespace (toleration for control-plane). -# Init containers are slow (~20 min each) due to mysqlsh plugin loading. - -resource "kubernetes_namespace" "mysql_operator" { - metadata { - name = "mysql-operator" - labels = { - tier = "1-cluster" - } - } -} - -resource "helm_release" "mysql_operator" { - namespace = kubernetes_namespace.mysql_operator.metadata[0].name - create_namespace = false - name = "mysql-operator" - timeout = 300 - - repository = "https://mysql.github.io/mysql-operator/" - chart = "mysql-operator" - version = "2.2.7" - - # NOTE: The mysql-operator chart (2.2.7) does NOT expose a resources values key. - # The resources block below is ignored by the chart. Without explicit resources - # on the deployment, the LimitRange default (256Mi) applies silently. - # Fix: kubectl patch deployment mysql-operator -n mysql-operator --type=json \ - # -p='[{"op":"replace","path":"/spec/template/spec/containers/0/resources","value":{"requests":{"cpu":"100m","memory":"256Mi"},"limits":{"memory":"512Mi"}}}]' - values = [yamlencode({ - resources = { - requests = { - cpu = "100m" - memory = "256Mi" - } - limits = { - memory = "512Mi" - } - } - })] -} - -# The mysql-sidecar ClusterRole created by the Helm chart is missing -# namespace and CRD list/watch permissions needed by the kopf framework -# in the sidecar container. Without these, the sidecar enters degraded -# mode and never completes InnoDB cluster join operations. -resource "kubernetes_cluster_role" "mysql_sidecar_extra" { - metadata { - name = "mysql-sidecar-extra" - } - rule { - api_groups = [""] - resources = ["namespaces"] - verbs = ["list", "watch"] - } - rule { - api_groups = ["apiextensions.k8s.io"] - resources = ["customresourcedefinitions"] - verbs = ["list", "watch"] - } -} - -resource "kubernetes_cluster_role_binding" "mysql_sidecar_extra" { - metadata { - name = "mysql-sidecar-extra" - } - role_ref { - api_group = "rbac.authorization.k8s.io" - kind = "ClusterRole" - name = kubernetes_cluster_role.mysql_sidecar_extra.metadata[0].name - } - subject { - kind = "ServiceAccount" - name = "mysql-cluster-sa" - namespace = kubernetes_namespace.dbaas.metadata[0].name - } -} - -# ConfigMap for MySQL extra config — mounted as subPath over 99-extra.cnf -# This is the only reliable way to persist innodb_doublewrite=OFF because: -# - spec.mycnf only applies on initial cluster creation -# - The operator's initconf container overwrites 99-extra.cnf on every pod start -# - SET PERSIST doesn't support innodb_doublewrite (static variable) -resource "kubernetes_config_map" "mysql_extra_cnf" { - metadata { - name = "mysql-extra-cnf" - namespace = kubernetes_namespace.dbaas.metadata[0].name - } - data = { - "99-extra.cnf" = <<-EOT - [mysqld] - innodb_doublewrite=OFF - EOT - } -} - -resource "helm_release" "mysql_cluster" { - namespace = kubernetes_namespace.dbaas.metadata[0].name - create_namespace = false - name = "mysql-cluster" - timeout = 900 - - repository = "https://mysql.github.io/mysql-operator/" - chart = "mysql-innodbcluster" - version = "2.2.7" - - values = [yamlencode({ - serverInstances = 1 - routerInstances = 1 - serverVersion = "8.4.4" - - credentials = { - root = { - user = "root" - password = var.dbaas_root_password - host = "%" - } - } - - tls = { - useSelfSigned = true - } - - datadirVolumeClaimTemplate = { - storageClassName = "proxmox-lvm-encrypted" - metadata = { - annotations = { - "resize.topolvm.io/threshold" = "80%" - "resize.topolvm.io/increase" = "20%" - "resize.topolvm.io/storage_limit" = "100Gi" - } - } - resources = { - requests = { - storage = "30Gi" - } - } - } - - serverConfig = { - mycnf = <<-EOT - [mysqld] - skip-name-resolve - mysql-native-password=ON - # Auto-recovery after crashes: rejoin group without manual intervention - group_replication_autorejoin_tries=2016 - group_replication_exit_state_action=OFFLINE_MODE - group_replication_member_expel_timeout=30 - group_replication_unreachable_majority_timeout=60 - group_replication_start_on_boot=ON - # Cap XCom cache to prevent unbounded growth (default 1GB causes OOM) - group_replication_message_cache_size=134217728 - # Reduce log buffer (16MB sufficient for this workload, was 64MB) - innodb_log_buffer_size=16777216 - # Limit connections (peak usage ~40, no need for 151) - max_connections=80 - # --- Disk write reduction (HDD/LVM thin) --- - # Flush redo log once per second, not per commit. Up to 1s data loss on MySQL crash, - # but group replication provides redundancy across 3 nodes. - innodb_flush_log_at_trx_commit=0 - # OS decides when to flush binlog (not per commit) - sync_binlog=0 - # HDD-tuned I/O capacity (default 200/2000 is for SSD) - innodb_io_capacity=100 - innodb_io_capacity_max=200 - # 1GB redo log capacity — larger log means less frequent checkpoint flushes - innodb_redo_log_capacity=1073741824 - # 1GB buffer pool - innodb_buffer_pool_size=1073741824 - # Disable doublewrite — halves write amplification. Safe with group replication - # (crashed node can re-clone from healthy replica rather than relying on local recovery) - innodb_doublewrite=OFF - # Flush neighbors on HDD (coalesce adjacent dirty pages into single I/O) - innodb_flush_neighbors=1 - # Reduce page cleaner aggressiveness - innodb_lru_scan_depth=256 - innodb_page_cleaners=1 - # Reduce adaptive flushing — let dirty pages accumulate longer before background flush - innodb_adaptive_flushing_lwm=10 - innodb_max_dirty_pages_pct=90 - innodb_max_dirty_pages_pct_lwm=10 - EOT - } - - # Top-level resources apply to SIDECAR container - # VPA shows sidecar needs only 248Mi target / 334Mi upper bound - # Setting to 350Mi (was 2Gi/4Gi - 17× over-provisioned) - resources = { - requests = { - cpu = "250m" - memory = "350Mi" - } - limits = { - memory = "350Mi" - } - } - - podSpec = { - affinity = { - nodeAffinity = { - requiredDuringSchedulingIgnoredDuringExecution = { - nodeSelectorTerms = [{ - matchExpressions = [{ - key = "kubernetes.io/hostname" - operator = "NotIn" - values = ["k8s-node1"] - }] - }] - } - } - podAntiAffinity = { - preferredDuringSchedulingIgnoredDuringExecution = [{ - weight = 100 - podAffinityTerm = { - labelSelector = { - matchLabels = { - "component" = "mysqld" - } - } - topologyKey = "kubernetes.io/hostname" - } - }] - } - } - # Container-specific resources for MYSQL container - # VPA shows 2.98Gi target / 5.26Gi upper bound - # Current usage ~1.8Gi peak. Reducing limit from 4Gi to 3Gi - containers = [ - { - name = "mysql" - resources = { - requests = { - memory = "2Gi" - cpu = "250m" - } - limits = { - memory = "3Gi" - } - } - }, - { - # MySQL operator sidecar (kopf Python control loop) - # VPA upper bound: 334Mi. Was 6Gi limit — 17× over-provisioned. - name = "sidecar" - resources = { - requests = { - memory = "350Mi" - cpu = "50m" - } - limits = { - memory = "512Mi" - } - } - } - ] - initContainers = [ - { - name = "fixdatadir" - resources = { - requests = { memory = "64Mi", cpu = "25m" } - limits = { memory = "64Mi" } - } - }, - { - name = "initconf" - resources = { - requests = { memory = "256Mi", cpu = "50m" } - limits = { memory = "256Mi" } - } - }, - { - name = "initmysql" - resources = { - requests = { memory = "512Mi", cpu = "250m" } - limits = { memory = "512Mi" } - } - } - ] - } - - # MySQL Router - explicitly set resources (chart does not expose router.resources) - # VPA shows 100Mi upper bound, setting to 128Mi - # Note: This requires manual kubectl patch after helm release: - # kubectl patch deployment mysql-cluster-router -n dbaas --type=json -p='[ - # {"op": "replace", "path": "/spec/template/spec/containers/0/resources", - # "value": {"requests": {"cpu": "25m", "memory": "128Mi"}, "limits": {"memory": "128Mi"}}}]' - # TODO: migrate to mysql-operator fork or wait for upstream router.resources support - - })] - - depends_on = [helm_release.mysql_operator] -} - #### MYSQL — Standalone (migration target) # # Standalone MySQL without Group Replication. Eliminates ~95 GB/day of GR