diff --git a/fire_planner/examples/llm_extract.py b/fire_planner/examples/llm_extract.py index c6e7ca1..22c51cf 100644 --- a/fire_planner/examples/llm_extract.py +++ b/fire_planner/examples/llm_extract.py @@ -27,7 +27,7 @@ log = logging.getLogger(__name__) # in the constant is historical — the value can be any llama-swap model id # (e.g. `qwen3vl-4b` when k8s-node1's VRAM is mostly held by immich-ml). QWEN_MODEL = os.environ.get("LLM_MODEL", "qwen3-8b") -CLAUDE_AGENT_MODEL = "claude-haiku-4-5" +CLAUDE_AGENT_MODEL = "haiku" HTTP_TIMEOUT = httpx.Timeout(60.0) PROMPT_SYSTEM = ( diff --git a/tests/test_examples_llm_extract.py b/tests/test_examples_llm_extract.py index a28c4f9..ab9026c 100644 --- a/tests/test_examples_llm_extract.py +++ b/tests/test_examples_llm_extract.py @@ -110,7 +110,7 @@ async def test_fallback_escalates_when_qwen_returns_none() -> None: ) assert out is not None - assert out.llm_model == "claude-haiku-4-5" + assert out.llm_model == "haiku" assert out.country == "Philippines" @@ -140,7 +140,7 @@ async def test_fallback_escalates_on_low_confidence() -> None: assert out is not None assert out.country == "Thailand" - assert out.llm_model == "claude-haiku-4-5" + assert out.llm_model == "haiku" @respx.mock