# DB as a service. Installs MySQL operator variable "tls_secret_name" {} variable "tier" { type = string } variable "dbaas_root_password" {} variable "cluster_master_service" { default = "mysql" } variable "postgresql_root_password" {} variable "pgadmin_password" {} variable "prod" { default = false type = bool } variable "nfs_server" { type = string } variable "kube_config_path" { type = string sensitive = true } # MySQL static application users (not rotated by Vault DB engine; baked into # each app's config). Codified here so future MySQL rebuilds cannot silently # drop them. variable "mysql_forgejo_password" { type = string sensitive = true } variable "mysql_roundcubemail_password" { type = string sensitive = true } resource "kubernetes_namespace" "dbaas" { metadata { name = "dbaas" labels = { tier = var.tier "resource-governance/custom-quota" = "true" } } lifecycle { # KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]] } } # Override Kyverno tier-1-cluster LimitRange (max 4Gi) to allow MySQL 6Gi limit resource "kubernetes_limit_range" "dbaas" { metadata { name = "tier-defaults" namespace = kubernetes_namespace.dbaas.metadata[0].name } spec { limit { type = "Container" default = { memory = "256Mi" } default_request = { cpu = "50m" memory = "256Mi" } max = { memory = "8Gi" } } } } resource "kubernetes_resource_quota" "dbaas" { metadata { name = "dbaas-quota" namespace = kubernetes_namespace.dbaas.metadata[0].name } spec { hard = { "requests.cpu" = "8" "requests.memory" = "40Gi" "limits.memory" = "40Gi" pods = "30" } } } module "tls_secret" { source = "../../../../modules/kubernetes/setup_tls_secret" namespace = kubernetes_namespace.dbaas.metadata[0].name tls_secret_name = var.tls_secret_name } #### MYSQL — Standalone (migration target) # # Standalone MySQL without Group Replication. Eliminates ~95 GB/day of GR # write overhead (binlog, relay log, XCom cache) for databases totaling ~35 MB. # Binary logging disabled entirely (skip-log-bin) since no replication needed. # Uses official mysql:8.4 image (Bitnami images deprecated by Broadcom Aug 2025). resource "kubernetes_config_map" "mysql_standalone_cnf" { metadata { name = "mysql-standalone-cnf" namespace = kubernetes_namespace.dbaas.metadata[0].name } data = { "standalone.cnf" = <<-EOT [mysqld] skip-name-resolve mysql-native-password=ON skip-log-bin max_connections=80 innodb_log_buffer_size=16777216 innodb_flush_log_at_trx_commit=2 innodb_io_capacity=100 innodb_io_capacity_max=200 innodb_redo_log_capacity=1073741824 innodb_buffer_pool_size=1073741824 innodb_flush_neighbors=1 innodb_lru_scan_depth=256 innodb_page_cleaners=1 innodb_adaptive_flushing_lwm=10 innodb_max_dirty_pages_pct=90 innodb_max_dirty_pages_pct_lwm=10 EOT } } resource "kubernetes_stateful_set_v1" "mysql_standalone" { metadata { name = "mysql-standalone" namespace = kubernetes_namespace.dbaas.metadata[0].name labels = { "app.kubernetes.io/name" = "mysql" "app.kubernetes.io/instance" = "mysql-standalone" "app.kubernetes.io/component" = "primary" } } spec { service_name = "mysql-standalone" replicas = 1 selector { match_labels = { "app.kubernetes.io/instance" = "mysql-standalone" "app.kubernetes.io/component" = "primary" } } template { metadata { labels = { "app.kubernetes.io/name" = "mysql" "app.kubernetes.io/instance" = "mysql-standalone" "app.kubernetes.io/component" = "primary" } } spec { affinity { node_affinity { required_during_scheduling_ignored_during_execution { node_selector_term { match_expressions { key = "nvidia.com/gpu.present" operator = "NotIn" values = ["true"] } } } } } container { name = "mysql" image = "mysql:8.4" port { container_port = 3306 name = "mysql" } env { name = "MYSQL_ROOT_PASSWORD" value_from { secret_key_ref { name = kubernetes_secret.cluster-password.metadata[0].name key = "ROOT_PASSWORD" } } } resources { requests = { cpu = "250m" memory = "3Gi" } limits = { memory = "4Gi" } } volume_mount { name = "data" mount_path = "/var/lib/mysql" } volume_mount { name = "config" mount_path = "/etc/mysql/conf.d" read_only = true } liveness_probe { exec { command = ["mysqladmin", "ping", "-h", "localhost"] } initial_delay_seconds = 30 period_seconds = 10 timeout_seconds = 5 failure_threshold = 3 } readiness_probe { exec { command = ["mysqladmin", "ping", "-h", "localhost"] } initial_delay_seconds = 10 period_seconds = 10 timeout_seconds = 5 failure_threshold = 3 } } volume { name = "config" config_map { name = kubernetes_config_map.mysql_standalone_cnf.metadata[0].name } } } } volume_claim_template { metadata { name = "data" annotations = { "resize.topolvm.io/threshold" = "10%" "resize.topolvm.io/increase" = "100%" "resize.topolvm.io/storage_limit" = "50Gi" } } spec { access_modes = ["ReadWriteOnce"] storage_class_name = "proxmox-lvm-encrypted" resources { requests = { storage = "5Gi" } } } } } lifecycle { ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1 } } # Compatibility service: mysql.dbaas.svc.cluster.local:3306 # Points at standalone MySQL (migrated from InnoDB Cluster 2026-04-16) resource "kubernetes_service" "mysql" { metadata { name = var.cluster_master_service namespace = kubernetes_namespace.dbaas.metadata[0].name } spec { selector = { "app.kubernetes.io/instance" = "mysql-standalone" "app.kubernetes.io/component" = "primary" } port { port = 3306 target_port = 3306 } } depends_on = [kubernetes_stateful_set_v1.mysql_standalone] } # MySQL static application users — not rotated by Vault DB engine. # Each app stores its password in its own config (forgejo app.ini, roundcube # ROUNDCUBEMAIL_DB_PASSWORD env). During the 2026-04-16 InnoDB Cluster → # standalone migration these users were accidentally dropped and recreated with # mismatched passwords; this block codifies them so a future rebuild cannot # silently break the apps. # # Pattern matches `null_resource.pg_terraform_state_db` below (local-exec into # the DB pod). We CREATE IF NOT EXISTS + ALTER USER on every apply so a # password rotation in Vault is re-synced on the next `scripts/tg apply`. The # `password_hash` trigger re-runs the provisioner when the Vault password # changes; the namespace/user triggers re-run if identifiers change. locals { mysql_static_users = { forgejo = { database = "forgejo" password = var.mysql_forgejo_password } roundcubemail = { database = "roundcubemail" password = var.mysql_roundcubemail_password } } } resource "null_resource" "mysql_static_user" { for_each = local.mysql_static_users depends_on = [kubernetes_stateful_set_v1.mysql_standalone] triggers = { username = each.key database = each.value.database password_hash = sha256(each.value.password) } provisioner "local-exec" { command = </dev/null || echo 0) _wb0=$(awk '/^write_bytes/{print $2}' /proc/$$/io 2>/dev/null || echo 0) export now=$(date +"%Y_%m_%d_%H_%M") mysqldump --all-databases -u root --host mysql.dbaas.svc.cluster.local | gzip -9 > /backup/dump_$now.sql.gz # Rotate — 14 day retention cd /backup find . -name "dump_*.sql.gz" -type f -mtime +14 -delete find . -name "dump_*.sql" -type f -mtime +14 -delete # clean up old uncompressed _dur=$(($(date +%s) - _t0)) _rb1=$(awk '/^read_bytes/{print $2}' /proc/$$/io 2>/dev/null || echo 0) _wb1=$(awk '/^write_bytes/{print $2}' /proc/$$/io 2>/dev/null || echo 0) echo "=== Backup IO Stats ===" echo "duration: $${_dur}s" echo "read: $(( (_rb1 - _rb0) / 1048576 )) MiB" echo "written: $(( (_wb1 - _wb0) / 1048576 )) MiB" echo "output: $(ls -lh /backup/dump_$now.sql.gz | awk '{print $5}')" _out_bytes=$(stat -c%s /backup/dump_$now.sql.gz) curl -sf --data-binary @- "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/mysql-backup" < kb port-forward mysql-647cfd4969-46rmw --address 0.0.0.0 3307:3306 # run mysql import (and specify non-localhost address to avoid using unix socket): (password is in tfvars) # > mysql -u root -p --host 10.0.10.10 --port 3307 < /mnt/nfs/2024_01_06_13_54.sql volume_mount { name = "mysql-backup" mount_path = "/backup" } } volume { name = "mysql-backup" persistent_volume_claim { claim_name = module.nfs_mysql_backup_host.claim_name } } } } } } } lifecycle { # KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2 ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config] } } # Per-database MySQL backups (enables single-database restore without affecting others) resource "kubernetes_cron_job_v1" "mysql-backup-per-db" { metadata { name = "mysql-backup-per-db" namespace = kubernetes_namespace.dbaas.metadata[0].name } spec { concurrency_policy = "Replace" failed_jobs_history_limit = 3 schedule = "45 0 * * *" starting_deadline_seconds = 10 successful_jobs_history_limit = 3 job_template { metadata {} spec { backoff_limit = 3 ttl_seconds_after_finished = 10 template { metadata {} spec { container { name = "mysql-backup-per-db" image = "docker.io/library/mysql:8.0" env { name = "MYSQL_PWD" value_from { secret_key_ref { name = "cluster-secret" key = "ROOT_PASSWORD" } } } command = ["/bin/bash", "-c", <<-EOT set -euo pipefail _t0=$(date +%s) now=$(date +"%Y_%m_%d_%H_%M") MYSQL_HOST=mysql.dbaas.svc.cluster.local failed=0 total=0 ok=0 # Discover all user databases dbs=$(mysql -u root --host $MYSQL_HOST -N -e \ "SELECT schema_name FROM information_schema.schemata WHERE schema_name NOT IN ('mysql','information_schema','performance_schema','sys','mysql_innodb_cluster_metadata');") for db in $dbs; do total=$((total + 1)) mkdir -p /backup/per-db/$db echo "=== Backing up $db ===" if mysqldump -u root --host $MYSQL_HOST --single-transaction --set-gtid-purged=OFF "$db" | gzip -9 > "/backup/per-db/$db/dump_$now.sql.gz"; then _size=$(stat -c%s "/backup/per-db/$db/dump_$now.sql.gz") echo " OK — $(( _size / 1024 )) KiB" ok=$((ok + 1)) else echo " FAILED" rm -f "/backup/per-db/$db/dump_$now.sql.gz" failed=$((failed + 1)) fi done # Rotate — 14 day retention per database find /backup/per-db -name "dump_*.sql.gz" -type f -mtime +14 -delete _dur=$(($(date +%s) - _t0)) echo "=== Per-DB Backup Summary ===" echo "databases: $total (ok: $ok, failed: $failed)" echo "duration: $${_dur}s" curl -sf --data-binary @- "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/mysql-backup-per-db" </dev/null 2>&1 || true _t0=$(date +%s) _rb0=$(awk '/^read_bytes/{print $2}' /proc/$$/io 2>/dev/null || echo 0) _wb0=$(awk '/^write_bytes/{print $2}' /proc/$$/io 2>/dev/null || echo 0) export now=$(date +"%Y_%m_%d_%H_%M") PGPASSWORD=$PGPASSWORD pg_dumpall -h pg-cluster-rw.dbaas -U postgres | gzip -9 > /backup/dump_$now.sql.gz # Rotate — 14 day retention cd /backup find . -name "dump_*.sql.gz" -type f -mtime +14 -delete find . -name "dump_*.sql" -type f -mtime +14 -delete # clean up old uncompressed _dur=$(($(date +%s) - _t0)) _rb1=$(awk '/^read_bytes/{print $2}' /proc/$$/io 2>/dev/null || echo 0) _wb1=$(awk '/^write_bytes/{print $2}' /proc/$$/io 2>/dev/null || echo 0) echo "=== Backup IO Stats ===" echo "duration: $${_dur}s" echo "read: $(( (_rb1 - _rb0) / 1048576 )) MiB" echo "written: $(( (_wb1 - _wb0) / 1048576 )) MiB" echo "output: $(ls -lh /backup/dump_$now.sql.gz | awk '{print $5}')" _out_bytes=$(stat -c%s /backup/dump_$now.sql.gz) curl -sf --data-binary @- "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/postgresql-backup" </dev/null 2>&1 || true _t0=$(date +%s) now=$(date +"%Y_%m_%d_%H_%M") PGHOST=pg-cluster-rw.dbaas PGUSER=postgres failed=0 total=0 ok=0 # Discover all user databases dbs=$(PGPASSWORD=$PGPASSWORD psql -h $PGHOST -U $PGUSER -t -A -c \ "SELECT datname FROM pg_database WHERE datistemplate = false AND datname != 'postgres' ORDER BY datname;") for db in $dbs; do total=$((total + 1)) mkdir -p /backup/per-db/$db echo "=== Backing up $db ===" if PGPASSWORD=$PGPASSWORD pg_dump -Fc -h $PGHOST -U $PGUSER "$db" > "/backup/per-db/$db/dump_$now.dump"; then _size=$(stat -c%s "/backup/per-db/$db/dump_$now.dump") echo " OK — $(( _size / 1024 )) KiB" ok=$((ok + 1)) else echo " FAILED" rm -f "/backup/per-db/$db/dump_$now.dump" failed=$((failed + 1)) fi done # Rotate — 14 day retention per database find /backup/per-db -name "dump_*.dump" -type f -mtime +14 -delete _dur=$(($(date +%s) - _t0)) echo "=== Per-DB Backup Summary ===" echo "databases: $total (ok: $ok, failed: $failed)" echo "duration: $${_dur}s" curl -sf --data-binary @- "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/postgresql-backup-per-db" <