diff --git a/fire_planner/examples/llm_extract.py b/fire_planner/examples/llm_extract.py
index 22c51cf..fd0d07b 100644
--- a/fire_planner/examples/llm_extract.py
+++ b/fire_planner/examples/llm_extract.py
@@ -62,6 +62,8 @@ async def extract_with_qwen(
         post=post,
         client=client,
         record_model=QWEN_MODEL,
+        # Suppress qwen3's chain-of-thought prefix so the response is bare JSON.
+        extra_body={"chat_template_kwargs": {"enable_thinking": False}},
     )
 
 
@@ -73,8 +75,9 @@ async def _call_openai_chat(
     client: httpx.AsyncClient,
     record_model: str,
     extra_headers: dict[str, str] | None = None,
+    extra_body: dict[str, Any] | None = None,
 ) -> ExtractedExample | None:
-    body = {
+    body: dict[str, Any] = {
         "model": model_name,
         "messages": [
             {"role": "system", "content": PROMPT_SYSTEM},
@@ -83,6 +86,8 @@ async def _call_openai_chat(
         "temperature": 0.0,
         "max_tokens": 512,
     }
+    if extra_body:
+        body.update(extra_body)
     try:
         resp = await client.post(
             url,
diff --git a/tests/test_examples_llm_extract.py b/tests/test_examples_llm_extract.py
index ab9026c..cb11d8c 100644
--- a/tests/test_examples_llm_extract.py
+++ b/tests/test_examples_llm_extract.py
@@ -174,6 +174,55 @@ async def test_fallback_keeps_high_confidence_qwen_result() -> None:
     assert claude_route.called is False  # high-confidence qwen → claude not hit
 
 
+@respx.mock
+@pytest.mark.asyncio
+async def test_qwen_request_disables_thinking() -> None:
+    """qwen call must include chat_template_kwargs.enable_thinking=False."""
+    captured: list[dict] = []
+
+    def capture(request: httpx.Request) -> httpx.Response:
+        captured.append(json.loads(request.content))
+        payload = {"country": "UK", "confidence": 0.9}
+        return httpx.Response(
+            200,
+            json={"choices": [{"message": {"content": json.dumps(payload)}}]},
+        )
+
+    respx.post(LLAMA_URL).mock(side_effect=capture)
+
+    async with httpx.AsyncClient() as client:
+        await extract_with_qwen(_post(), llama_url=LLAMA_URL, client=client)
+
+    assert captured, "No request made"
+    body = captured[0]
+    assert body.get("chat_template_kwargs", {}).get("enable_thinking") is False
+
+
+@respx.mock
+@pytest.mark.asyncio
+async def test_claude_request_omits_thinking_kwarg() -> None:
+    """Claude call must NOT send chat_template_kwargs — it uses a different API."""
+    from fire_planner.examples.llm_extract import extract_with_claude
+
+    captured: list[dict] = []
+
+    def capture(request: httpx.Request) -> httpx.Response:
+        captured.append(json.loads(request.content))
+        payload = {"country": "UK", "confidence": 0.9}
+        return httpx.Response(
+            200,
+            json={"choices": [{"message": {"content": json.dumps(payload)}}]},
+        )
+
+    respx.post(CLAUDE_URL).mock(side_effect=capture)
+
+    async with httpx.AsyncClient() as client:
+        await extract_with_claude(_post(), claude_url=CLAUDE_URL, bearer="t", client=client)
+
+    assert captured, "No request made"
+    assert "chat_template_kwargs" not in captured[0]
+
+
 def test_to_gbp_converts_usd() -> None:
     rates = {"GBP": Decimal("1"), "USD": Decimal("0.80")}
     assert to_gbp(Decimal("100"), "USD", rates) == Decimal("80.00")