fix cluster health: resolve 21/23 failures from healthcheck
- nvidia: change GPU taint NoSchedule -> PreferNoSchedule to allow overflow scheduling on k8s-node1 (frees ~7Gi capacity) - kyverno: increase reports-controller memory 256Mi -> 512Mi (OOMKilled) - speedtest: add missing DB_PORT=3306 env var (nc: service "" unknown) - realestate-crawler: increase API memory 64Mi -> 256Mi (OOMKilled) - calibre: increase liveness probe timeout 1s -> 5s (false restarts)
This commit is contained in:
parent
dc576aa8b6
commit
6f2f4c089c
5 changed files with 10 additions and 5 deletions
|
|
@ -215,6 +215,7 @@ resource "kubernetes_deployment" "calibre-web-automated" {
|
||||||
path = "/"
|
path = "/"
|
||||||
port = 8083
|
port = 8083
|
||||||
}
|
}
|
||||||
|
timeout_seconds = 5
|
||||||
period_seconds = 30
|
period_seconds = 30
|
||||||
failure_threshold = 3
|
failure_threshold = 3
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,11 +30,11 @@ resource "helm_release" "kyverno" {
|
||||||
reportsController = {
|
reportsController = {
|
||||||
resources = {
|
resources = {
|
||||||
limits = {
|
limits = {
|
||||||
memory = "256Mi"
|
memory = "512Mi"
|
||||||
}
|
}
|
||||||
requests = {
|
requests = {
|
||||||
cpu = "100m"
|
cpu = "100m"
|
||||||
memory = "128Mi"
|
memory = "384Mi"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ resource "kubernetes_resource_quota" "nvidia_quota" {
|
||||||
resource "null_resource" "gpu_node_config" {
|
resource "null_resource" "gpu_node_config" {
|
||||||
provisioner "local-exec" {
|
provisioner "local-exec" {
|
||||||
command = <<-EOT
|
command = <<-EOT
|
||||||
kubectl taint nodes k8s-node1 nvidia.com/gpu=true:NoSchedule --overwrite
|
kubectl taint nodes k8s-node1 nvidia.com/gpu=true:PreferNoSchedule --overwrite
|
||||||
kubectl label nodes k8s-node1 gpu=true --overwrite
|
kubectl label nodes k8s-node1 gpu=true --overwrite
|
||||||
EOT
|
EOT
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -210,10 +210,10 @@ resource "kubernetes_deployment" "realestate-crawler-api" {
|
||||||
resources {
|
resources {
|
||||||
requests = {
|
requests = {
|
||||||
cpu = "15m"
|
cpu = "15m"
|
||||||
memory = "64Mi"
|
memory = "256Mi"
|
||||||
}
|
}
|
||||||
limits = {
|
limits = {
|
||||||
memory = "64Mi"
|
memory = "256Mi"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
volume_mount {
|
volume_mount {
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,10 @@ resource "kubernetes_deployment" "speedtest" {
|
||||||
name = "DB_PASSWORD"
|
name = "DB_PASSWORD"
|
||||||
value = data.vault_kv_secret_v2.secrets.data["db_password"]
|
value = data.vault_kv_secret_v2.secrets.data["db_password"]
|
||||||
}
|
}
|
||||||
|
env {
|
||||||
|
name = "DB_PORT"
|
||||||
|
value = "3306"
|
||||||
|
}
|
||||||
env {
|
env {
|
||||||
name = "APP_TIMEZONE"
|
name = "APP_TIMEZONE"
|
||||||
value = "Europe/Sofia"
|
value = "Europe/Sofia"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue