[ci skip] openclaw: switch to free agentic models via NVIDIA NIM, OpenRouter, Llama API
- Primary: Mistral Large 3 (675B) on NIM - always warm, excellent tool calling - Fallback 1: Nemotron Ultra 253B on NIM - Fallback 2: Llama 4 Maverick on Llama API (different provider for resilience) - 10 models total across 3 providers, all free - Removed: Modal (GLM-5), Gemini, Ollama providers - Added: NVIDIA NIM provider with DeepSeek V3.2, Qwen 3.5, Qwen 3 Coder, GLM-5 - Bumped maxTokens from 8192 to 16384 for agentic output room
This commit is contained in:
parent
99ecba46db
commit
014f6cad5a
1 changed files with 30 additions and 34 deletions
|
|
@ -1,12 +1,11 @@
|
|||
variable "tls_secret_name" { type = string }
|
||||
variable "openclaw_ssh_key" { type = string }
|
||||
variable "openclaw_skill_secrets" { type = map(string) }
|
||||
variable "gemini_api_key" { type = string }
|
||||
variable "llama_api_key" { type = string }
|
||||
variable "brave_api_key" { type = string }
|
||||
variable "modal_api_key" { type = string }
|
||||
variable "openrouter_api_key" { type = string }
|
||||
variable "nvidia_api_key" { type = string }
|
||||
variable "nfs_server" { type = string }
|
||||
variable "ollama_host" { type = string }
|
||||
|
||||
|
||||
resource "kubernetes_namespace" "openclaw" {
|
||||
|
|
@ -87,14 +86,20 @@ resource "kubernetes_config_map" "openclaw_config" {
|
|||
contextTokens = 1000000
|
||||
bootstrapMaxChars = 30000
|
||||
model = {
|
||||
primary = "modal/zai-org/GLM-5-FP8"
|
||||
fallbacks = ["gemini/gemini-2.5-flash", "llama-as-openai/Llama-3.3-70B-Instruct"]
|
||||
primary = "nim/mistralai/mistral-large-3-675b-instruct-2512"
|
||||
fallbacks = ["nim/nvidia/llama-3.1-nemotron-ultra-253b-v1", "llama-as-openai/Llama-4-Maverick-17B-128E-Instruct-FP8"]
|
||||
}
|
||||
models = {
|
||||
"modal/zai-org/GLM-5-FP8" = { streaming = false }
|
||||
"gemini/gemini-2.5-flash" = {}
|
||||
"llama-as-openai/Llama-3.3-70B-Instruct" = {}
|
||||
"llama-as-openai/Llama-4-Scout-17B-16E-Instruct-FP8" = {}
|
||||
"nim/deepseek-ai/deepseek-v3.2" = {}
|
||||
"nim/qwen/qwen3.5-397b-a17b" = {}
|
||||
"nim/mistralai/mistral-large-3-675b-instruct-2512" = {}
|
||||
"nim/qwen/qwen3-coder-480b-a35b-instruct" = {}
|
||||
"nim/nvidia/llama-3.1-nemotron-ultra-253b-v1" = {}
|
||||
"nim/z-ai/glm5" = {}
|
||||
"llama-as-openai/Llama-4-Maverick-17B-128E-Instruct-FP8" = {}
|
||||
"llama-as-openai/Llama-4-Scout-17B-16E-Instruct-FP8" = {}
|
||||
"openrouter/stepfun/step-3.5-flash:free" = {}
|
||||
"openrouter/arcee-ai/trinity-large-preview:free" = {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -124,30 +129,26 @@ resource "kubernetes_config_map" "openclaw_config" {
|
|||
models = {
|
||||
mode = "merge"
|
||||
providers = {
|
||||
modal = {
|
||||
baseUrl = "https://api.us-west-2.modal.direct/v1"
|
||||
nim = {
|
||||
baseUrl = "https://integrate.api.nvidia.com/v1"
|
||||
api = "openai-completions"
|
||||
apiKey = var.modal_api_key
|
||||
apiKey = var.nvidia_api_key
|
||||
models = [
|
||||
{ id = "zai-org/GLM-5-FP8", name = "GLM-5", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "deepseek-ai/deepseek-v3.2", name = "DeepSeek V3.2", reasoning = false, input = ["text"], contextWindow = 164000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "qwen/qwen3.5-397b-a17b", name = "Qwen 3.5", reasoning = true, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "mistralai/mistral-large-3-675b-instruct-2512", name = "Mistral Large 3", reasoning = false, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "qwen/qwen3-coder-480b-a35b-instruct", name = "Qwen 3 Coder", reasoning = false, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "nvidia/llama-3.1-nemotron-ultra-253b-v1", name = "Nemotron Ultra 253B", reasoning = true, input = ["text"], contextWindow = 128000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "z-ai/glm5", name = "GLM-5", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
]
|
||||
}
|
||||
gemini = {
|
||||
baseUrl = "https://generativelanguage.googleapis.com/v1beta"
|
||||
api = "google-generative-ai"
|
||||
apiKey = var.gemini_api_key
|
||||
models = [
|
||||
{ id = "gemini-2.5-flash", name = "gemini-2.5-flash", reasoning = true, input = ["text", "image"], contextWindow = 1048576, maxTokens = 65536, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
]
|
||||
}
|
||||
ollama = {
|
||||
baseUrl = "http://${var.ollama_host}:11434/v1"
|
||||
openrouter = {
|
||||
baseUrl = "https://openrouter.ai/api/v1"
|
||||
api = "openai-completions"
|
||||
apiKey = "ollama"
|
||||
apiKey = var.openrouter_api_key
|
||||
models = [
|
||||
{ id = "qwen2.5-coder:14b", name = "qwen2.5-coder:14b", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "qwen2.5:14b", name = "qwen2.5:14b", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "deepseek-r1:14b", name = "deepseek-r1:14b", reasoning = true, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "stepfun/step-3.5-flash:free", name = "Step 3.5 Flash", reasoning = true, input = ["text"], contextWindow = 256000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "arcee-ai/trinity-large-preview:free", name = "Trinity Large", reasoning = false, input = ["text"], contextWindow = 131000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
]
|
||||
}
|
||||
llama-as-openai = {
|
||||
|
|
@ -155,9 +156,8 @@ resource "kubernetes_config_map" "openclaw_config" {
|
|||
apiKey = var.llama_api_key
|
||||
api = "openai-completions"
|
||||
models = [
|
||||
{ id = "Llama-3.3-70B-Instruct", name = "Llama-3.3-70B-Instruct", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "Llama-4-Scout-17B-16E-Instruct-FP8", name = "Llama-4-Scout-17B-16E-Instruct-FP8", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "Llama-4-Maverick-17B-128E-Instruct-FP8", name = "Llama-4-Maverick-17B-128E-Instruct-FP8", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "Llama-4-Maverick-17B-128E-Instruct-FP8", name = "Llama 4 Maverick", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
{ id = "Llama-4-Scout-17B-16E-Instruct-FP8", name = "Llama 4 Scout", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
@ -345,10 +345,6 @@ resource "kubernetes_deployment" "openclaw" {
|
|||
name = "GIT_CONFIG_GLOBAL"
|
||||
value = "/home/node/.openclaw/.gitconfig"
|
||||
}
|
||||
env {
|
||||
name = "GEMINI_API_KEY"
|
||||
value = var.gemini_api_key
|
||||
}
|
||||
# Skill secrets - Home Assistant
|
||||
env {
|
||||
name = "HOME_ASSISTANT_URL"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue