examples: LLM_MODEL env var (default qwen3-8b; swap to qwen3vl-4b in K8s)
This commit is contained in:
parent
9b32247fea
commit
536f432a46
1 changed files with 6 additions and 1 deletions
|
|
@ -11,6 +11,7 @@ from __future__ import annotations
|
|||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -21,7 +22,11 @@ from fire_planner.examples.models import ExtractedExample, RawPost
|
|||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
QWEN_MODEL = "qwen3-8b"
|
||||
# `LLM_MODEL` lets the deployment swap to a smaller model when the GPU is
|
||||
# contested. Default stays on qwen3-8b for local dev / tests. The "qwen" name
|
||||
# in the constant is historical — the value can be any llama-swap model id
|
||||
# (e.g. `qwen3vl-4b` when k8s-node1's VRAM is mostly held by immich-ml).
|
||||
QWEN_MODEL = os.environ.get("LLM_MODEL", "qwen3-8b")
|
||||
CLAUDE_AGENT_MODEL = "claude-haiku-4-5"
|
||||
HTTP_TIMEOUT = httpx.Timeout(60.0)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue