fire-planner: LLM_MODEL env var → qwen3vl-4b default (fits in current GPU headroom; immich-ml is holding ~10GB)
This commit is contained in:
parent
fb1e47a20a
commit
5c77482a8c
1 changed files with 10 additions and 0 deletions
|
|
@ -633,6 +633,12 @@ variable "claude_agent_service_url" {
|
|||
default = "http://claude-agent-service.claude-agent.svc.cluster.local:8080/v1/chat/completions"
|
||||
}
|
||||
|
||||
variable "examples_llm_model" {
|
||||
type = string
|
||||
description = "llama-swap model id for the examples LLM primary extractor. Use qwen3-8b when GPU has ≥5GB free; qwen3vl-4b when immich-ml is using ~10GB."
|
||||
default = "qwen3vl-4b"
|
||||
}
|
||||
|
||||
variable "run_examples_bulk_ingest" {
|
||||
type = bool
|
||||
description = "Flip to true once to bulk-populate fire_example. Reset to false after."
|
||||
|
|
@ -796,6 +802,10 @@ resource "kubernetes_job_v1" "examples_bulk_ingest" {
|
|||
name = "CLAUDE_AGENT_SERVICE_URL"
|
||||
value = var.claude_agent_service_url
|
||||
}
|
||||
env {
|
||||
name = "LLM_MODEL"
|
||||
value = var.examples_llm_model
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue