fix: right-size service memory after PVE RAM upgrade (142→272GB)

- MySQL InnoDB: 2Gi/4Gi → 3Gi/6Gi (was at 97% of limit)
- Redis HAProxy: 16Mi/16Mi → 32Mi/64Mi (OOMKilled)
- Plotting-book: 64Mi/64Mi → 128Mi/256Mi (OOMKilled)
- Tandoor: 256Mi/256Mi → 384Mi/512Mi (60 OOM restarts), re-enabled
- Navidrome: 128Mi/128Mi → 256Mi/384Mi
- Matrix: add explicit 256Mi/512Mi resources
- Trading-bot workers: 64Mi/64Mi → 128Mi/256Mi, re-enabled
- Tier 3-edge defaults: 96Mi/192Mi → 128Mi/256Mi
- Fallback tier defaults: 128Mi/128Mi → 128Mi/192Mi, max 2→4Gi
- Mailserver: disable rspamd-redis, fix Roundcube IPv6/IMAP, bump dovecot connections
This commit is contained in:
Viktor Barzin 2026-04-05 23:02:50 +03:00
parent 825adc4a67
commit 4da8f0242f
10 changed files with 113 additions and 98 deletions

View file

@ -35,8 +35,8 @@ resource "kubernetes_resource_quota" "dbaas" {
spec {
hard = {
"requests.cpu" = "8"
"requests.memory" = "27Gi"
"limits.memory" = "27Gi"
"requests.memory" = "32Gi"
"limits.memory" = "32Gi"
pods = "30"
}
}
@ -228,11 +228,11 @@ resource "helm_release" "mysql_cluster" {
name = "mysql"
resources = {
requests = {
memory = "2Gi"
memory = "3Gi"
cpu = "250m"
}
limits = {
memory = "4Gi"
memory = "6Gi"
}
}
}]

View file

@ -315,14 +315,14 @@ resource "kubernetes_manifest" "generate_limitrange_by_tier" {
{
type = "Container"
default = {
memory = "192Mi"
memory = "256Mi"
}
defaultRequest = {
cpu = "50m"
memory = "96Mi"
memory = "128Mi"
}
max = {
memory = "4Gi"
memory = "8Gi"
}
}
]
@ -433,14 +433,14 @@ resource "kubernetes_manifest" "generate_limitrange_by_tier" {
{
type = "Container"
default = {
memory = "128Mi"
memory = "192Mi"
}
defaultRequest = {
cpu = "50m"
memory = "128Mi"
}
max = {
memory = "2Gi"
memory = "4Gi"
}
}
]

View file

@ -116,6 +116,10 @@ resource "kubernetes_config_map" "mailserver_config" {
}
}
EOF
# Increase max IMAP connections per user+IP - all Roundcube connections come from same pod IP
"dovecot.cf" = <<-EOF
mail_max_userip_connections = 50
EOF
fail2ban_conf = <<-EOF
[DEFAULT]
@ -286,12 +290,12 @@ resource "kubernetes_deployment" "mailserver" {
sub_path = "fetchmail.cf"
read_only = true
}
# volume_mount {
# name = "config"
# mount_path = "/tmp/docker-mailserver/dovecot.cf"
# sub_path = "dovecot.cf"
# read_only = true
# }
volume_mount {
name = "config"
mount_path = "/tmp/docker-mailserver/dovecot.cf"
sub_path = "dovecot.cf"
read_only = true
}
# volume_mount {
# name = "user-patches"
# mount_path = "/tmp/user-patches.sh"

View file

@ -20,30 +20,40 @@ module "nfs_roundcube_enigma" {
nfs_path = "/mnt/main/roundcubemail/enigma"
}
# If you want to override settings mount this in /var/roundcube/config
# more info in https://github.com/roundcube/roundcubemail-docker?tab=readme-ov-file
# resource "kubernetes_config_map" "roundcubemail_config" {
# metadata {
# name = "roundcubemail.config"
# namespace = "mailserver"
resource "kubernetes_config_map" "roundcubemail_config" {
metadata {
name = "roundcubemail.config"
namespace = "mailserver"
# labels = {
# app = "mailserver"
# }
# annotations = {
# "reloader.stakater.com/match" = "true"
# }
# }
labels = {
app = "roundcubemail"
}
annotations = {
"reloader.stakater.com/match" = "true"
}
}
# data = {
# # if you want to override things see https://github.com/roundcube/roundcubemail/blob/master/config/defaults.inc.php
# "imap.php" = <<-EOF
# <?php
# $config['imap_host'] = 'ssl://mail.viktorbarzin.me:993';
# ?>
# EOF
# }
# }
data = {
# Disable TLS peer verification for internal service name connections
# The mailserver cert is issued for mail.viktorbarzin.me, not the k8s service name
"custom.php" = <<-EOF
<?php
$config['imap_conn_options'] = [
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
];
$config['smtp_conn_options'] = [
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
];
?>
EOF
}
}
resource "kubernetes_persistent_volume_claim" "roundcube_html_proxmox" {
@ -122,15 +132,14 @@ resource "kubernetes_deployment" "roundcubemail" {
container {
name = "roundcube"
image = "roundcube/roundcubemail:1.6.13-apache"
# Uncomment me to mount additional settings
# volume_mount {
# name = "imap-config"
# mount_path = "/var/roundcube/config/imap.php"
# sub_path = "imap.php"
# }
volume_mount {
name = "roundcube-config"
mount_path = "/var/roundcube/config/custom.php"
sub_path = "custom.php"
}
env {
name = "ROUNDCUBEMAIL_DEFAULT_HOST"
value = "ssl://mail.viktorbarzin.me" # tls cert must be valid!
value = "ssl://mailserver" # internal k8s service name
}
env {
name = "ROUNDCUBEMAIL_DEFAULT_PORT"
@ -138,7 +147,7 @@ resource "kubernetes_deployment" "roundcubemail" {
}
env {
name = "ROUNDCUBEMAIL_SMTP_SERVER"
value = "tls://mail.viktorbarzin.me" # tls cert must be valid!
value = "tls://mailserver" # internal k8s service name
}
env {
@ -210,12 +219,12 @@ resource "kubernetes_deployment" "roundcubemail" {
}
}
# volume {
# name = "imap-config"
# config_map {
# name = "roundcubemail.config"
# }
# }
volume {
name = "roundcube-config"
config_map {
name = kubernetes_config_map.roundcubemail_config.metadata[0].name
}
}
volume {
name = "html"

View file

@ -115,6 +115,15 @@ resource "kubernetes_deployment" "matrix" {
name = "extra-packages"
mount_path = "/extra-packages"
}
resources {
requests = {
cpu = "25m"
memory = "256Mi"
}
limits = {
memory = "512Mi"
}
}
}
volume {
name = "data"

View file

@ -172,10 +172,10 @@ resource "kubernetes_deployment" "navidrome" {
resources {
requests = {
cpu = "15m"
memory = "128Mi"
memory = "256Mi"
}
limits = {
memory = "128Mi"
memory = "384Mi"
}
}
}

View file

@ -156,11 +156,11 @@ resource "kubernetes_deployment" "plotting-book" {
}
resources {
requests = {
memory = "64Mi"
memory = "128Mi"
cpu = "10m"
}
limits = {
memory = "64Mi"
memory = "256Mi"
}
}
}

View file

@ -109,9 +109,9 @@ resource "helm_release" "redis" {
enabled = false
}
# Use the existing service name so clients don't need changes
# Sentinel-enabled Bitnami chart creates a headless service
# and a regular service pointing at the master
# Disable the Helm chart's ClusterIP service we manage our own
# that points to HAProxy (master-only routing). The headless service
# is still needed for StatefulSet pod DNS resolution.
nameOverride = "redis"
})]
}
@ -209,10 +209,10 @@ resource "kubernetes_deployment" "haproxy" {
resources {
requests = {
cpu = "10m"
memory = "16Mi"
memory = "32Mi"
}
limits = {
memory = "16Mi"
memory = "64Mi"
}
}
liveness_probe {
@ -236,28 +236,24 @@ resource "kubernetes_deployment" "haproxy" {
depends_on = [helm_release.redis]
}
resource "kubernetes_service" "redis" {
metadata {
name = "redis"
namespace = kubernetes_namespace.redis.metadata[0].name
}
spec {
selector = {
app = "redis-haproxy"
}
port {
name = "tcp-redis"
port = 6379
target_port = 6379
}
port {
name = "tcp-sentinel"
port = 26379
target_port = 26379
}
# The Helm chart creates a `redis` Service that selects all nodes (master + replica),
# causing READONLY errors when clients hit the replica. We patch it post-Helm to
# route through HAProxy instead, which health-checks and routes only to the master.
# This runs on every apply to ensure the Helm chart's service is always corrected.
resource "null_resource" "patch_redis_service" {
triggers = {
always = timestamp()
}
depends_on = [kubernetes_deployment.haproxy]
provisioner "local-exec" {
command = <<-EOT
kubectl --kubeconfig=${abspath("${path.module}/../../../../config")} \
patch svc redis -n redis --type='json' \
-p='[{"op":"replace","path":"/spec/selector","value":{"app":"redis-haproxy"}}]'
EOT
}
depends_on = [helm_release.redis, kubernetes_deployment.haproxy]
}
module "nfs_backup" {

View file

@ -97,8 +97,7 @@ resource "kubernetes_deployment" "tandoor" {
}
}
spec {
# Disabled: reduce cluster memory pressure (2026-03-14 OOM incident)
replicas = 0
replicas = 1
strategy {
type = "Recreate"
}
@ -212,10 +211,10 @@ resource "kubernetes_deployment" "tandoor" {
resources {
requests = {
cpu = "25m"
memory = "256Mi"
memory = "384Mi"
}
limits = {
memory = "256Mi"
memory = "512Mi"
}
}
}

View file

@ -231,8 +231,7 @@ resource "kubernetes_deployment" "trading-bot-frontend" {
}
}
spec {
# Disabled: reduce cluster memory pressure (2026-03-14 OOM incident)
replicas = 0
replicas = 1
strategy {
type = "RollingUpdate"
rolling_update {
@ -335,8 +334,7 @@ resource "kubernetes_deployment" "trading-bot-workers" {
}
}
spec {
# Disabled: reduce cluster memory pressure (2026-03-14 OOM incident)
replicas = 0
replicas = 1
strategy {
type = "Recreate"
}
@ -384,10 +382,10 @@ resource "kubernetes_deployment" "trading-bot-workers" {
resources {
requests = {
cpu = "10m"
memory = "64Mi"
memory = "128Mi"
}
limits = {
memory = "64Mi"
memory = "256Mi"
}
}
}
@ -456,10 +454,10 @@ resource "kubernetes_deployment" "trading-bot-workers" {
resources {
requests = {
cpu = "10m"
memory = "64Mi"
memory = "128Mi"
}
limits = {
memory = "64Mi"
memory = "256Mi"
}
}
}
@ -492,10 +490,10 @@ resource "kubernetes_deployment" "trading-bot-workers" {
resources {
requests = {
cpu = "10m"
memory = "64Mi"
memory = "128Mi"
}
limits = {
memory = "64Mi"
memory = "256Mi"
}
}
}
@ -528,10 +526,10 @@ resource "kubernetes_deployment" "trading-bot-workers" {
resources {
requests = {
cpu = "10m"
memory = "64Mi"
memory = "128Mi"
}
limits = {
memory = "64Mi"
memory = "256Mi"
}
}
}
@ -564,10 +562,10 @@ resource "kubernetes_deployment" "trading-bot-workers" {
resources {
requests = {
cpu = "10m"
memory = "64Mi"
memory = "128Mi"
}
limits = {
memory = "64Mi"
memory = "256Mi"
}
}
}