diff --git a/fire_planner/examples/llm_extract.py b/fire_planner/examples/llm_extract.py index 22c51cf..fd0d07b 100644 --- a/fire_planner/examples/llm_extract.py +++ b/fire_planner/examples/llm_extract.py @@ -62,6 +62,8 @@ async def extract_with_qwen( post=post, client=client, record_model=QWEN_MODEL, + # Suppress qwen3's chain-of-thought prefix so the response is bare JSON. + extra_body={"chat_template_kwargs": {"enable_thinking": False}}, ) @@ -73,8 +75,9 @@ async def _call_openai_chat( client: httpx.AsyncClient, record_model: str, extra_headers: dict[str, str] | None = None, + extra_body: dict[str, Any] | None = None, ) -> ExtractedExample | None: - body = { + body: dict[str, Any] = { "model": model_name, "messages": [ {"role": "system", "content": PROMPT_SYSTEM}, @@ -83,6 +86,8 @@ async def _call_openai_chat( "temperature": 0.0, "max_tokens": 512, } + if extra_body: + body.update(extra_body) try: resp = await client.post( url, diff --git a/tests/test_examples_llm_extract.py b/tests/test_examples_llm_extract.py index ab9026c..cb11d8c 100644 --- a/tests/test_examples_llm_extract.py +++ b/tests/test_examples_llm_extract.py @@ -174,6 +174,55 @@ async def test_fallback_keeps_high_confidence_qwen_result() -> None: assert claude_route.called is False # high-confidence qwen → claude not hit +@respx.mock +@pytest.mark.asyncio +async def test_qwen_request_disables_thinking() -> None: + """qwen call must include chat_template_kwargs.enable_thinking=False.""" + captured: list[dict] = [] + + def capture(request: httpx.Request) -> httpx.Response: + captured.append(json.loads(request.content)) + payload = {"country": "UK", "confidence": 0.9} + return httpx.Response( + 200, + json={"choices": [{"message": {"content": json.dumps(payload)}}]}, + ) + + respx.post(LLAMA_URL).mock(side_effect=capture) + + async with httpx.AsyncClient() as client: + await extract_with_qwen(_post(), llama_url=LLAMA_URL, client=client) + + assert captured, "No request made" + body = captured[0] + assert body.get("chat_template_kwargs", {}).get("enable_thinking") is False + + +@respx.mock +@pytest.mark.asyncio +async def test_claude_request_omits_thinking_kwarg() -> None: + """Claude call must NOT send chat_template_kwargs — it uses a different API.""" + from fire_planner.examples.llm_extract import extract_with_claude + + captured: list[dict] = [] + + def capture(request: httpx.Request) -> httpx.Response: + captured.append(json.loads(request.content)) + payload = {"country": "UK", "confidence": 0.9} + return httpx.Response( + 200, + json={"choices": [{"message": {"content": json.dumps(payload)}}]}, + ) + + respx.post(CLAUDE_URL).mock(side_effect=capture) + + async with httpx.AsyncClient() as client: + await extract_with_claude(_post(), claude_url=CLAUDE_URL, bearer="t", client=client) + + assert captured, "No request made" + assert "chat_template_kwargs" not in captured[0] + + def test_to_gbp_converts_usd() -> None: rates = {"GBP": Decimal("1"), "USD": Decimal("0.80")} assert to_gbp(Decimal("100"), "USD", rates) == Decimal("80.00")