[ci skip] openclaw: fix slow startup — proper resources + readiness probe + VPA off

- Set explicit CPU (2 cores) and memory (2Gi) limits
  Root cause: Goldilocks VPA was throttling to 300m CPU, causing gateway
  to take 5+ minutes to start, and 1Gi memory caused OOM crashes
- Add TCP readiness probe on port 18789 to prevent 502 Bad Gateway
  during startup (Traefik was routing before gateway was listening)
- Disable Goldilocks VPA via namespace label (vpa-update-mode: off)
This commit is contained in:
Viktor Barzin 2026-03-01 14:44:22 +00:00
parent b1a3685f50
commit 0da6f90ad2

View file

@ -13,7 +13,8 @@ resource "kubernetes_namespace" "openclaw" {
metadata {
name = "openclaw"
labels = {
tier = local.tiers.aux
tier = local.tiers.aux
"goldilocks.fairwinds.com/vpa-update-mode" = "off"
}
}
}
@ -358,6 +359,13 @@ resource "kubernetes_deployment" "openclaw" {
port {
container_port = 18789
}
readiness_probe {
tcp_socket {
port = 18789
}
initial_delay_seconds = 30
period_seconds = 10
}
env {
name = "OPENCLAW_GATEWAY_TOKEN"
value = random_password.gateway_token.result
@ -432,10 +440,12 @@ resource "kubernetes_deployment" "openclaw" {
}
resources {
limits = {
memory = "1Gi"
cpu = "2"
memory = "2Gi"
}
requests = {
memory = "64Mi"
cpu = "100m"
memory = "512Mi"
}
}
}