immich: clip-keepalive CronJob to pin smart-search model warm
MACHINE_LEARNING_MODEL_TTL=600 is a single global knob, so it unloads the CLIP textual (smart-search) encoder after idle exactly like OCR/face — immich has no per-model pin. This CronJob pings the textual encoder every 5 min (< the 600s TTL) via immich-ml /predict, so a search query never pays the ~1.5s cold-load, while idle OCR/face still free their VRAM on the shared T4. Textual-only (search = text->embedding->pgvector); the visual encoder is import-time and left to unload. curl baked into the image (no runtime install). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
f0948493b3
commit
01ea7d6fa1
2 changed files with 59 additions and 1 deletions
|
|
@ -795,6 +795,64 @@ resource "kubernetes_service" "immich-machine-learning" {
|
|||
}
|
||||
}
|
||||
|
||||
# Keeps the CLIP *textual* (smart-search) model resident on the shared T4.
|
||||
# MACHINE_LEARNING_MODEL_TTL=600 is a single GLOBAL knob — without traffic it
|
||||
# unloads CLIP after 600s idle exactly like OCR/face (immich has no per-model
|
||||
# pin). This job pings the textual encoder every 5 min (< the 600s TTL) so a
|
||||
# search query never pays the cold-load, while idle OCR/face still free their
|
||||
# VRAM. Textual only: smart search is text->embedding->pgvector; the visual
|
||||
# encoder is import-time and is intentionally left to unload. The modelName
|
||||
# MUST match MACHINE_LEARNING_PRELOAD__CLIP__TEXTUAL on the deployment above.
|
||||
resource "kubernetes_cron_job_v1" "clip-keepalive" {
|
||||
metadata {
|
||||
name = "clip-keepalive"
|
||||
namespace = kubernetes_namespace.immich.metadata[0].name
|
||||
}
|
||||
spec {
|
||||
concurrency_policy = "Forbid"
|
||||
failed_jobs_history_limit = 3
|
||||
successful_jobs_history_limit = 1
|
||||
schedule = "*/5 * * * *"
|
||||
starting_deadline_seconds = 60
|
||||
job_template {
|
||||
metadata {}
|
||||
spec {
|
||||
backoff_limit = 1
|
||||
active_deadline_seconds = 60
|
||||
ttl_seconds_after_finished = 120
|
||||
template {
|
||||
metadata {}
|
||||
spec {
|
||||
container {
|
||||
name = "warmup"
|
||||
# curl baked into the image — never apt/apk/pip install at
|
||||
# runtime in a CronJob (writes to the node container layer on
|
||||
# every run; see status-page-pusher disk-write incident).
|
||||
image = "docker.io/curlimages/curl:8.11.1"
|
||||
# exec form (no shell) so the JSON quotes pass through verbatim.
|
||||
command = [
|
||||
"curl", "-sf", "-m", "30",
|
||||
"-F", "entries={\"clip\":{\"textual\":{\"modelName\":\"ViT-B-16-SigLIP2__webli\"}}}",
|
||||
"-F", "text=keepalive",
|
||||
"http://immich-machine-learning:3003/predict",
|
||||
]
|
||||
resources {
|
||||
requests = { cpu = "10m", memory = "16Mi" }
|
||||
limits = { memory = "32Mi" }
|
||||
}
|
||||
}
|
||||
restart_policy = "Never"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
lifecycle {
|
||||
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
|
||||
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
|
||||
}
|
||||
}
|
||||
|
||||
module "ingress-immich" {
|
||||
source = "../../modules/kubernetes/ingress_factory"
|
||||
# auth = "app": Immich has its own user auth + bearer-token API. Authentik
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue