variable "tls_secret_name" {} variable "tier" { type = string } variable "nfs_server" { type = string } resource "kubernetes_namespace" "redis" { metadata { name = "redis" labels = { tier = var.tier } } } module "tls_secret" { source = "../../../../modules/kubernetes/setup_tls_secret" namespace = kubernetes_namespace.redis.metadata[0].name tls_secret_name = var.tls_secret_name } # Redis with Sentinel HA via Bitnami Helm chart # Architecture: 1 master + 1 replica + 2 sentinels (one per node) # Sentinel automatically promotes a replica if master fails # HAProxy sits in front and routes only to the current master (see below) resource "helm_release" "redis" { namespace = kubernetes_namespace.redis.metadata[0].name create_namespace = false name = "redis" atomic = true timeout = 600 repository = "oci://10.0.20.10:5000/bitnamicharts" chart = "redis" version = "25.3.2" values = [yamlencode({ architecture = "replication" auth = { enabled = false } sentinel = { enabled = true quorum = 2 masterSet = "mymaster" automateCluster = true resources = { requests = { cpu = "50m" memory = "64Mi" } limits = { memory = "64Mi" } } } master = { persistence = { enabled = true storageClass = "iscsi-truenas" size = "2Gi" } resources = { requests = { cpu = "100m" memory = "64Mi" } limits = { memory = "64Mi" } } } replica = { replicaCount = 2 persistence = { enabled = true storageClass = "iscsi-truenas" size = "2Gi" } resources = { requests = { cpu = "50m" memory = "64Mi" } limits = { memory = "64Mi" } } } # Metrics for Prometheus metrics = { enabled = false } # Use the existing service name so clients don't need changes # Sentinel-enabled Bitnami chart creates a headless service # and a regular service pointing at the master nameOverride = "redis" })] } # HAProxy-based master-only proxy for simple redis:// clients. # Health-checks each Redis node via INFO replication and only routes # to the current master. On Sentinel failover, HAProxy detects the # new master within seconds via its health check interval. # Previously this was a K8s Service that routed to all nodes, causing # READONLY errors when clients hit a replica. resource "kubernetes_config_map" "haproxy" { metadata { name = "redis-haproxy" namespace = kubernetes_namespace.redis.metadata[0].name } data = { "haproxy.cfg" = <<-EOT global maxconn 256 defaults mode tcp timeout connect 5s timeout client 30s timeout server 30s timeout check 3s frontend redis_front bind *:6379 default_backend redis_master frontend sentinel_front bind *:26379 default_backend redis_sentinel backend redis_master option tcp-check tcp-check connect tcp-check send "PING\r\n" tcp-check expect string +PONG tcp-check send "INFO replication\r\n" tcp-check expect string role:master tcp-check send "QUIT\r\n" tcp-check expect string +OK server redis-node-0 redis-node-0.redis-headless.redis.svc.cluster.local:6379 check inter 3s fall 3 rise 2 server redis-node-1 redis-node-1.redis-headless.redis.svc.cluster.local:6379 check inter 3s fall 3 rise 2 backend redis_sentinel balance roundrobin server redis-node-0 redis-node-0.redis-headless.redis.svc.cluster.local:26379 check inter 5s server redis-node-1 redis-node-1.redis-headless.redis.svc.cluster.local:26379 check inter 5s EOT } } resource "kubernetes_deployment" "haproxy" { metadata { name = "redis-haproxy" namespace = kubernetes_namespace.redis.metadata[0].name labels = { app = "redis-haproxy" } } spec { replicas = 2 selector { match_labels = { app = "redis-haproxy" } } template { metadata { labels = { app = "redis-haproxy" } } spec { container { name = "haproxy" image = "docker.io/library/haproxy:3.1-alpine" port { container_port = 6379 name = "redis" } port { container_port = 26379 name = "sentinel" } volume_mount { name = "config" mount_path = "/usr/local/etc/haproxy" read_only = true } resources { requests = { cpu = "10m" memory = "16Mi" } limits = { memory = "16Mi" } } liveness_probe { tcp_socket { port = 6379 } initial_delay_seconds = 5 period_seconds = 10 } } volume { name = "config" config_map { name = kubernetes_config_map.haproxy.metadata[0].name } } } } } depends_on = [helm_release.redis] } resource "kubernetes_service" "redis" { metadata { name = "redis" namespace = kubernetes_namespace.redis.metadata[0].name } spec { selector = { app = "redis-haproxy" } port { name = "tcp-redis" port = 6379 target_port = 6379 } port { name = "tcp-sentinel" port = 26379 target_port = 26379 } } depends_on = [kubernetes_deployment.haproxy] } module "nfs_backup" { source = "../../../../modules/kubernetes/nfs_volume" name = "redis-backup" namespace = kubernetes_namespace.redis.metadata[0].name nfs_server = var.nfs_server nfs_path = "/mnt/main/redis-backup" } # Hourly backup: copy RDB snapshot from master to NFS resource "kubernetes_cron_job_v1" "redis-backup" { metadata { name = "redis-backup" namespace = kubernetes_namespace.redis.metadata[0].name } spec { concurrency_policy = "Replace" failed_jobs_history_limit = 3 schedule = "0 * * * *" starting_deadline_seconds = 10 successful_jobs_history_limit = 3 job_template { metadata {} spec { backoff_limit = 2 ttl_seconds_after_finished = 60 template { metadata {} spec { container { name = "redis-backup" image = "redis:7-alpine" command = ["/bin/sh", "-c", <<-EOT set -eux TIMESTAMP=$(date +%Y%m%d-%H%M) # Trigger a fresh RDB save on the master redis-cli -h redis.redis BGSAVE sleep 5 # Copy the RDB via redis-cli --rdb redis-cli -h redis.redis --rdb /backup/redis-$TIMESTAMP.rdb # Rotate — 7-day retention find /backup -name 'redis-*.rdb' -type f -mtime +7 -delete echo "Backup complete: redis-$TIMESTAMP.rdb" EOT ] volume_mount { name = "backup" mount_path = "/backup" } } volume { name = "backup" persistent_volume_claim { claim_name = module.nfs_backup.claim_name } } } } } } } }