From 5c77482a8c165c2cae1b1628193c824e5884af6e Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Mon, 1 Jun 2026 19:07:55 +0000 Subject: [PATCH] =?UTF-8?q?fire-planner:=20LLM=5FMODEL=20env=20var=20?= =?UTF-8?q?=E2=86=92=20qwen3vl-4b=20default=20(fits=20in=20current=20GPU?= =?UTF-8?q?=20headroom;=20immich-ml=20is=20holding=20~10GB)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stacks/fire-planner/main.tf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/stacks/fire-planner/main.tf b/stacks/fire-planner/main.tf index fa3fe4f7..5c6b934a 100644 --- a/stacks/fire-planner/main.tf +++ b/stacks/fire-planner/main.tf @@ -633,6 +633,12 @@ variable "claude_agent_service_url" { default = "http://claude-agent-service.claude-agent.svc.cluster.local:8080/v1/chat/completions" } +variable "examples_llm_model" { + type = string + description = "llama-swap model id for the examples LLM primary extractor. Use qwen3-8b when GPU has ≥5GB free; qwen3vl-4b when immich-ml is using ~10GB." + default = "qwen3vl-4b" +} + variable "run_examples_bulk_ingest" { type = bool description = "Flip to true once to bulk-populate fire_example. Reset to false after." @@ -796,6 +802,10 @@ resource "kubernetes_job_v1" "examples_bulk_ingest" { name = "CLAUDE_AGENT_SERVICE_URL" value = var.claude_agent_service_url } + env { + name = "LLM_MODEL" + value = var.examples_llm_model + } } } }