fire-planner: LLM_MODEL env var → qwen3vl-4b default (fits in current GPU headroom; immich-ml is holding ~10GB)

2026-06-01 19:07:55 +00:00 · 2026-06-01 19:07:55 +00:00 · 5c77482a8c
commit 5c77482a8c
parent fb1e47a20a
1 changed files with 10 additions and 0 deletions
--- a/stacks/fire-planner/main.tf
+++ b/stacks/fire-planner/main.tf
@ -633,6 +633,12 @@ variable "claude_agent_service_url" {
  default     = "http://claude-agent-service.claude-agent.svc.cluster.local:8080/v1/chat/completions"
 }

+variable "examples_llm_model" {
+  type        = string
+  description = "llama-swap model id for the examples LLM primary extractor. Use qwen3-8b when GPU has ≥5GB free; qwen3vl-4b when immich-ml is using ~10GB."
+  default     = "qwen3vl-4b"
+}
+
 variable "run_examples_bulk_ingest" {
  type        = bool
  description = "Flip to true once to bulk-populate fire_example. Reset to false after."
@ -796,6 +802,10 @@ resource "kubernetes_job_v1" "examples_bulk_ingest" {
            name  = "CLAUDE_AGENT_SERVICE_URL"
            value = var.claude_agent_service_url
          }
+          env {
+            name  = "LLM_MODEL"
+            value = var.examples_llm_model
+          }
        }
      }
    }