From fcbb5971f310a49156318e35cf5d832ae5756d46 Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Sun, 14 Dec 2025 09:47:36 +0000
Subject: [PATCH] move immich ml into a separate deplaoyment and ask it to use
 gpu [ci skip]

---
 modules/kubernetes/immich/chart_values.tpl |   5 +-
 modules/kubernetes/immich/main.tf          | 101 ++++++++++++++++++++-
 2 files changed, 103 insertions(+), 3 deletions(-)

diff --git a/modules/kubernetes/immich/chart_values.tpl b/modules/kubernetes/immich/chart_values.tpl
index abfd7175..882cada4 100644
--- a/modules/kubernetes/immich/chart_values.tpl
+++ b/modules/kubernetes/immich/chart_values.tpl
@@ -29,7 +29,7 @@ env:
   # IMMICH_MACHINE_LEARNING_URL: "http://immich-machine-learning.immich.svc.cluster.local:3003"
 
 image:
-  tag: v2.3.1
+  tag: ${version}
 
 immich:
   persistence:
@@ -55,7 +55,8 @@ server:
 
 # increase liveliness and readiness checks to allow enough time for downloading models
 machine-learning:
-  enabled: true
+  # enabled: true
+  enabled: false
   image:
     repository: ghcr.io/immich-app/immich-machine-learning
     pullPolicy: IfNotPresent
diff --git a/modules/kubernetes/immich/main.tf b/modules/kubernetes/immich/main.tf
index 985ea2c2..067c4479 100644
--- a/modules/kubernetes/immich/main.tf
+++ b/modules/kubernetes/immich/main.tf
@@ -1,6 +1,12 @@
 variable "tls_secret_name" {}
 variable "postgresql_password" {}
 variable "homepage_token" {}
+variable "immich_version" {
+  type = string
+  # Change me to upgrade
+  default = "v2.3.1"
+}
+
 
 module "tls_secret" {
   source          = "../setup_tls_secret"
@@ -181,7 +187,100 @@ resource "helm_release" "immich" {
   version    = "0.9.3"
   timeout    = 6000
 
-  values = [templatefile("${path.module}/chart_values.tpl", { postgresql_password = var.postgresql_password })]
+  values = [templatefile("${path.module}/chart_values.tpl", { postgresql_password = var.postgresql_password, version = var.immich_version })]
+}
+
+# The helm one cannot be customized to use affinity settings to use the gpu node
+resource "kubernetes_deployment" "immich-machine-learning" {
+  metadata {
+    name      = "immich-machine-learning"
+    namespace = "immich"
+  }
+  spec {
+    replicas = 1
+    selector {
+      match_labels = {
+        app = "immich-machine-learning"
+      }
+    }
+    strategy {
+      type = "RollingUpdate"
+    }
+    template {
+      metadata {
+        labels = {
+          app = "immich-machine-learning"
+        }
+      }
+      spec {
+        node_selector = {
+          "gpu" : "true"
+        }
+        container {
+          # image = "ghcr.io/immich-app/immich-machine-learning:${var.immich_version}-cuda"
+          image = "ghcr.io/immich-app/immich-machine-learning:${var.immich_version}"
+          name  = "immich-machine-learning"
+          port {
+            container_port = 3003
+            protocol       = "TCP"
+            name           = "immich-ml"
+          }
+          env {
+            name  = "TRANSFORMERS_CACHE"
+            value = "/cache"
+          }
+          env {
+            name  = "HF_XET_CACHE"
+            value = "/cache/huggingface-xet"
+          }
+          env {
+            name  = "MPLCONFIGDIR"
+            value = "/cache/matplotlib-config"
+          }
+          env {
+            name  = "MACHINE_LEARNING_PRELOAD__CLIP"
+            value = "ViT-B-16-SigLIP2__webli"
+          }
+
+          volume_mount {
+            name       = "cache"
+            mount_path = "/cache"
+          }
+          resources {
+            limits = {
+              "nvidia.com/gpu" = "1" # Used for inference
+            }
+          }
+        }
+        volume {
+          name = "cache"
+          nfs {
+            path   = "/mnt/main/immich/machine-learning"
+            server = "10.0.10.15"
+          }
+        }
+      }
+    }
+  }
+}
+
+resource "kubernetes_service" "immich-machine-learning" {
+  metadata {
+    name      = "immich-machine-learning"
+    namespace = "immich"
+    labels = {
+      "app" = "immich-machine-learning"
+    }
+  }
+
+  spec {
+    selector = {
+      app = "immich-machine-learning"
+    }
+    port {
+      port = 3003
+    }
+  }
 }
 
 resource "kubernetes_ingress_v1" "ingress" {