fix OOM kills: tune MySQL memory, reduce Nextcloud workers, increase Uptime Kuma limit
MySQL (3 OOM kills): - Cap group_replication_message_cache_size to 128MB (default 1GB caused OOM) - Reduce innodb_log_buffer_size from 64MB to 16MB - Lower max_connections from 151 to 80 (peak usage ~40) - Increase memory limit from 3Gi to 4Gi for headroom Nextcloud (30+ apache2 OOM kills per incident): - Reduce MaxRequestWorkers from 50 to 10 to prevent fork bomb when SQLite locks cause request pileup - Lower StartServers/MinSpare/MaxSpare proportionally Uptime Kuma (Node.js memory leak): - Increase memory limit from 256Mi to 512Mi - Increase CPU limit from 200m to 500m Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f2c7444159
commit
81bfccaefc
3 changed files with 25 additions and 19 deletions
|
|
@ -1,9 +1,9 @@
|
|||
variable "tls_secret_name" {
|
||||
type = string
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
variable "nextcloud_db_password" {
|
||||
type = string
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
variable "nfs_server" { type = string }
|
||||
|
|
@ -93,15 +93,15 @@ resource "kubernetes_config_map" "apache_tuning" {
|
|||
}
|
||||
data = {
|
||||
"mpm_prefork.conf" = <<-EOF
|
||||
# Tuned for container with 6Gi memory limit
|
||||
# Each worker uses ~220MB RSS, so 50 workers ≈ 11GB (shared pages reduce actual)
|
||||
# Need enough workers so probes can get through during SQLite locks
|
||||
# Tuned for container with 6Gi memory limit and SQLite backend
|
||||
# Each worker uses ~100-200MB RSS. 10 workers = ~2GB max
|
||||
# Low count prevents fork bomb when SQLite locks cause request pileup
|
||||
<IfModule mpm_prefork_module>
|
||||
StartServers 5
|
||||
MinSpareServers 3
|
||||
MaxSpareServers 10
|
||||
MaxRequestWorkers 50
|
||||
MaxConnectionsPerChild 200
|
||||
StartServers 3
|
||||
MinSpareServers 2
|
||||
MaxSpareServers 5
|
||||
MaxRequestWorkers 10
|
||||
MaxConnectionsPerChild 100
|
||||
</IfModule>
|
||||
EOF
|
||||
}
|
||||
|
|
@ -223,12 +223,12 @@ module "ingress" {
|
|||
port = 8080
|
||||
rybbit_site_id = "5a3bfe59a3fe"
|
||||
extra_annotations = {
|
||||
"gethomepage.dev/enabled" = "true"
|
||||
"gethomepage.dev/name" = "Nextcloud"
|
||||
"gethomepage.dev/description" = "Cloud productivity suite"
|
||||
"gethomepage.dev/icon" = "nextcloud.png"
|
||||
"gethomepage.dev/group" = "Productivity"
|
||||
"gethomepage.dev/pod-selector" = ""
|
||||
"gethomepage.dev/enabled" = "true"
|
||||
"gethomepage.dev/name" = "Nextcloud"
|
||||
"gethomepage.dev/description" = "Cloud productivity suite"
|
||||
"gethomepage.dev/icon" = "nextcloud.png"
|
||||
"gethomepage.dev/group" = "Productivity"
|
||||
"gethomepage.dev/pod-selector" = ""
|
||||
"gethomepage.dev/widget.type" = "nextcloud"
|
||||
"gethomepage.dev/widget.url" = "https://nextcloud.viktorbarzin.me"
|
||||
"gethomepage.dev/widget.username" = var.homepage_credentials["nextcloud"]["username"]
|
||||
|
|
|
|||
|
|
@ -171,6 +171,12 @@ resource "helm_release" "mysql_cluster" {
|
|||
group_replication_member_expel_timeout=30
|
||||
group_replication_unreachable_majority_timeout=60
|
||||
group_replication_start_on_boot=ON
|
||||
# Cap XCom cache to prevent unbounded growth (default 1GB causes OOM)
|
||||
group_replication_message_cache_size=134217728
|
||||
# Reduce log buffer (16MB sufficient for this workload, was 64MB)
|
||||
innodb_log_buffer_size=16777216
|
||||
# Limit connections (peak usage ~40, no need for 151)
|
||||
max_connections=80
|
||||
EOT
|
||||
}
|
||||
|
||||
|
|
@ -181,7 +187,7 @@ resource "helm_release" "mysql_cluster" {
|
|||
}
|
||||
limits = {
|
||||
cpu = "2"
|
||||
memory = "3Gi"
|
||||
memory = "4Gi"
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -71,8 +71,8 @@ resource "kubernetes_deployment" "uptime-kuma" {
|
|||
memory = "64Mi"
|
||||
}
|
||||
limits = {
|
||||
cpu = "200m"
|
||||
memory = "256Mi"
|
||||
cpu = "500m"
|
||||
memory = "512Mi"
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue