From 5c77482a8c165c2cae1b1628193c824e5884af6e Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Mon, 1 Jun 2026 19:07:55 +0000
Subject: [PATCH] =?UTF-8?q?fire-planner:=20LLM=5FMODEL=20env=20var=20?=
 =?UTF-8?q?=E2=86=92=20qwen3vl-4b=20default=20(fits=20in=20current=20GPU?=
 =?UTF-8?q?=20headroom;=20immich-ml=20is=20holding=20~10GB)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 stacks/fire-planner/main.tf | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/stacks/fire-planner/main.tf b/stacks/fire-planner/main.tf
index fa3fe4f7..5c6b934a 100644
--- a/stacks/fire-planner/main.tf
+++ b/stacks/fire-planner/main.tf
@@ -633,6 +633,12 @@ variable "claude_agent_service_url" {
   default     = "http://claude-agent-service.claude-agent.svc.cluster.local:8080/v1/chat/completions"
 }
 
+variable "examples_llm_model" {
+  type        = string
+  description = "llama-swap model id for the examples LLM primary extractor. Use qwen3-8b when GPU has ≥5GB free; qwen3vl-4b when immich-ml is using ~10GB."
+  default     = "qwen3vl-4b"
+}
+
 variable "run_examples_bulk_ingest" {
   type        = bool
   description = "Flip to true once to bulk-populate fire_example. Reset to false after."
@@ -796,6 +802,10 @@ resource "kubernetes_job_v1" "examples_bulk_ingest" {
             name  = "CLAUDE_AGENT_SERVICE_URL"
             value = var.claude_agent_service_url
           }
+          env {
+            name  = "LLM_MODEL"
+            value = var.examples_llm_model
+          }
         }
       }
     }