fix immich TF drift from Kyverno ndots injection, right-size nvidia GPU operator
- immich: add lifecycle ignore_changes for dns_config on all 3 deployments to prevent perpetual plan drift from Kyverno ndots:2 mutation policy - nvidia dcgm-exporter: 768Mi → 2560Mi (VPA upper 2091Mi, was under-provisioned) - nvidia cuda-validator: 1024Mi → 256Mi (one-shot job, vastly over-provisioned)
This commit is contained in:
parent
a6d281dbc6
commit
5beb481dc4
2 changed files with 36 additions and 0 deletions
|
|
@ -114,6 +114,12 @@ resource "kubernetes_deployment" "immich_server" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [
|
||||||
|
spec[0].template[0].spec[0].dns_config,
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
spec {
|
spec {
|
||||||
replicas = 1
|
replicas = 1
|
||||||
progress_deadline_seconds = 600
|
progress_deadline_seconds = 600
|
||||||
|
|
@ -326,6 +332,13 @@ resource "kubernetes_deployment" "immich-postgres" {
|
||||||
tier = local.tiers.gpu
|
tier = local.tiers.gpu
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [
|
||||||
|
spec[0].template[0].spec[0].dns_config,
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
spec {
|
spec {
|
||||||
replicas = 1
|
replicas = 1
|
||||||
selector {
|
selector {
|
||||||
|
|
@ -436,6 +449,13 @@ resource "kubernetes_deployment" "immich-machine-learning" {
|
||||||
tier = local.tiers.gpu
|
tier = local.tiers.gpu
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [
|
||||||
|
spec[0].template[0].spec[0].dns_config,
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
spec {
|
spec {
|
||||||
replicas = 1
|
replicas = 1
|
||||||
selector {
|
selector {
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,22 @@ driver:
|
||||||
config:
|
config:
|
||||||
name: time-slicing-config
|
name: time-slicing-config
|
||||||
|
|
||||||
|
# DCGM Exporter - increased from 768Mi to 2560Mi based on VPA upper bound of 2091Mi
|
||||||
|
dcgmExporter:
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "2560Mi"
|
||||||
|
limits:
|
||||||
|
memory: "2560Mi"
|
||||||
|
|
||||||
|
# CUDA Validator - reduced from 1024Mi to 256Mi (one-shot job)
|
||||||
|
validator:
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "256Mi"
|
||||||
|
limits:
|
||||||
|
memory: "256Mi"
|
||||||
|
|
||||||
# Tolerate GPU node taint for all GPU operator components
|
# Tolerate GPU node taint for all GPU operator components
|
||||||
daemonsets:
|
daemonsets:
|
||||||
tolerations:
|
tolerations:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue