fix OOM kills: tune MySQL memory, reduce Nextcloud workers, increase Uptime Kuma limit

MySQL (3 OOM kills):
- Cap group_replication_message_cache_size to 128MB (default 1GB caused OOM)
- Reduce innodb_log_buffer_size from 64MB to 16MB
- Lower max_connections from 151 to 80 (peak usage ~40)
- Increase memory limit from 3Gi to 4Gi for headroom

Nextcloud (30+ apache2 OOM kills per incident):
- Reduce MaxRequestWorkers from 50 to 10 to prevent fork bomb
  when SQLite locks cause request pileup
- Lower StartServers/MinSpare/MaxSpare proportionally

Uptime Kuma (Node.js memory leak):
- Increase memory limit from 256Mi to 512Mi
- Increase CPU limit from 200m to 500m

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-03-12 07:26:08 +00:00
parent f2c7444159
commit 81bfccaefc
3 changed files with 25 additions and 19 deletions

View file

@ -1,9 +1,9 @@
variable "tls_secret_name" { variable "tls_secret_name" {
type = string type = string
sensitive = true sensitive = true
} }
variable "nextcloud_db_password" { variable "nextcloud_db_password" {
type = string type = string
sensitive = true sensitive = true
} }
variable "nfs_server" { type = string } variable "nfs_server" { type = string }
@ -93,15 +93,15 @@ resource "kubernetes_config_map" "apache_tuning" {
} }
data = { data = {
"mpm_prefork.conf" = <<-EOF "mpm_prefork.conf" = <<-EOF
# Tuned for container with 6Gi memory limit # Tuned for container with 6Gi memory limit and SQLite backend
# Each worker uses ~220MB RSS, so 50 workers 11GB (shared pages reduce actual) # Each worker uses ~100-200MB RSS. 10 workers = ~2GB max
# Need enough workers so probes can get through during SQLite locks # Low count prevents fork bomb when SQLite locks cause request pileup
<IfModule mpm_prefork_module> <IfModule mpm_prefork_module>
StartServers 5 StartServers 3
MinSpareServers 3 MinSpareServers 2
MaxSpareServers 10 MaxSpareServers 5
MaxRequestWorkers 50 MaxRequestWorkers 10
MaxConnectionsPerChild 200 MaxConnectionsPerChild 100
</IfModule> </IfModule>
EOF EOF
} }
@ -223,12 +223,12 @@ module "ingress" {
port = 8080 port = 8080
rybbit_site_id = "5a3bfe59a3fe" rybbit_site_id = "5a3bfe59a3fe"
extra_annotations = { extra_annotations = {
"gethomepage.dev/enabled" = "true" "gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Nextcloud" "gethomepage.dev/name" = "Nextcloud"
"gethomepage.dev/description" = "Cloud productivity suite" "gethomepage.dev/description" = "Cloud productivity suite"
"gethomepage.dev/icon" = "nextcloud.png" "gethomepage.dev/icon" = "nextcloud.png"
"gethomepage.dev/group" = "Productivity" "gethomepage.dev/group" = "Productivity"
"gethomepage.dev/pod-selector" = "" "gethomepage.dev/pod-selector" = ""
"gethomepage.dev/widget.type" = "nextcloud" "gethomepage.dev/widget.type" = "nextcloud"
"gethomepage.dev/widget.url" = "https://nextcloud.viktorbarzin.me" "gethomepage.dev/widget.url" = "https://nextcloud.viktorbarzin.me"
"gethomepage.dev/widget.username" = var.homepage_credentials["nextcloud"]["username"] "gethomepage.dev/widget.username" = var.homepage_credentials["nextcloud"]["username"]

View file

@ -171,6 +171,12 @@ resource "helm_release" "mysql_cluster" {
group_replication_member_expel_timeout=30 group_replication_member_expel_timeout=30
group_replication_unreachable_majority_timeout=60 group_replication_unreachable_majority_timeout=60
group_replication_start_on_boot=ON group_replication_start_on_boot=ON
# Cap XCom cache to prevent unbounded growth (default 1GB causes OOM)
group_replication_message_cache_size=134217728
# Reduce log buffer (16MB sufficient for this workload, was 64MB)
innodb_log_buffer_size=16777216
# Limit connections (peak usage ~40, no need for 151)
max_connections=80
EOT EOT
} }
@ -181,7 +187,7 @@ resource "helm_release" "mysql_cluster" {
} }
limits = { limits = {
cpu = "2" cpu = "2"
memory = "3Gi" memory = "4Gi"
} }
} }

View file

@ -71,8 +71,8 @@ resource "kubernetes_deployment" "uptime-kuma" {
memory = "64Mi" memory = "64Mi"
} }
limits = { limits = {
cpu = "200m" cpu = "500m"
memory = "256Mi" memory = "512Mi"
} }
} }