[ci skip] openclaw: switch to free agentic models via NVIDIA NIM, OpenRouter, Llama API

- Primary: Mistral Large 3 (675B) on NIM - always warm, excellent tool calling
- Fallback 1: Nemotron Ultra 253B on NIM
- Fallback 2: Llama 4 Maverick on Llama API (different provider for resilience)
- 10 models total across 3 providers, all free
- Removed: Modal (GLM-5), Gemini, Ollama providers
- Added: NVIDIA NIM provider with DeepSeek V3.2, Qwen 3.5, Qwen 3 Coder, GLM-5
- Bumped maxTokens from 8192 to 16384 for agentic output room
This commit is contained in:
Viktor Barzin 2026-03-01 13:22:47 +00:00
parent 99ecba46db
commit 014f6cad5a

View file

@ -1,12 +1,11 @@
variable "tls_secret_name" { type = string }
variable "openclaw_ssh_key" { type = string }
variable "openclaw_skill_secrets" { type = map(string) }
variable "gemini_api_key" { type = string }
variable "llama_api_key" { type = string }
variable "brave_api_key" { type = string }
variable "modal_api_key" { type = string }
variable "openrouter_api_key" { type = string }
variable "nvidia_api_key" { type = string }
variable "nfs_server" { type = string }
variable "ollama_host" { type = string }
resource "kubernetes_namespace" "openclaw" {
@ -87,14 +86,20 @@ resource "kubernetes_config_map" "openclaw_config" {
contextTokens = 1000000
bootstrapMaxChars = 30000
model = {
primary = "modal/zai-org/GLM-5-FP8"
fallbacks = ["gemini/gemini-2.5-flash", "llama-as-openai/Llama-3.3-70B-Instruct"]
primary = "nim/mistralai/mistral-large-3-675b-instruct-2512"
fallbacks = ["nim/nvidia/llama-3.1-nemotron-ultra-253b-v1", "llama-as-openai/Llama-4-Maverick-17B-128E-Instruct-FP8"]
}
models = {
"modal/zai-org/GLM-5-FP8" = { streaming = false }
"gemini/gemini-2.5-flash" = {}
"llama-as-openai/Llama-3.3-70B-Instruct" = {}
"llama-as-openai/Llama-4-Scout-17B-16E-Instruct-FP8" = {}
"nim/deepseek-ai/deepseek-v3.2" = {}
"nim/qwen/qwen3.5-397b-a17b" = {}
"nim/mistralai/mistral-large-3-675b-instruct-2512" = {}
"nim/qwen/qwen3-coder-480b-a35b-instruct" = {}
"nim/nvidia/llama-3.1-nemotron-ultra-253b-v1" = {}
"nim/z-ai/glm5" = {}
"llama-as-openai/Llama-4-Maverick-17B-128E-Instruct-FP8" = {}
"llama-as-openai/Llama-4-Scout-17B-16E-Instruct-FP8" = {}
"openrouter/stepfun/step-3.5-flash:free" = {}
"openrouter/arcee-ai/trinity-large-preview:free" = {}
}
}
}
@ -124,30 +129,26 @@ resource "kubernetes_config_map" "openclaw_config" {
models = {
mode = "merge"
providers = {
modal = {
baseUrl = "https://api.us-west-2.modal.direct/v1"
nim = {
baseUrl = "https://integrate.api.nvidia.com/v1"
api = "openai-completions"
apiKey = var.modal_api_key
apiKey = var.nvidia_api_key
models = [
{ id = "zai-org/GLM-5-FP8", name = "GLM-5", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "deepseek-ai/deepseek-v3.2", name = "DeepSeek V3.2", reasoning = false, input = ["text"], contextWindow = 164000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "qwen/qwen3.5-397b-a17b", name = "Qwen 3.5", reasoning = true, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "mistralai/mistral-large-3-675b-instruct-2512", name = "Mistral Large 3", reasoning = false, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "qwen/qwen3-coder-480b-a35b-instruct", name = "Qwen 3 Coder", reasoning = false, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "nvidia/llama-3.1-nemotron-ultra-253b-v1", name = "Nemotron Ultra 253B", reasoning = true, input = ["text"], contextWindow = 128000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "z-ai/glm5", name = "GLM-5", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
]
}
gemini = {
baseUrl = "https://generativelanguage.googleapis.com/v1beta"
api = "google-generative-ai"
apiKey = var.gemini_api_key
models = [
{ id = "gemini-2.5-flash", name = "gemini-2.5-flash", reasoning = true, input = ["text", "image"], contextWindow = 1048576, maxTokens = 65536, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
]
}
ollama = {
baseUrl = "http://${var.ollama_host}:11434/v1"
openrouter = {
baseUrl = "https://openrouter.ai/api/v1"
api = "openai-completions"
apiKey = "ollama"
apiKey = var.openrouter_api_key
models = [
{ id = "qwen2.5-coder:14b", name = "qwen2.5-coder:14b", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "qwen2.5:14b", name = "qwen2.5:14b", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "deepseek-r1:14b", name = "deepseek-r1:14b", reasoning = true, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "stepfun/step-3.5-flash:free", name = "Step 3.5 Flash", reasoning = true, input = ["text"], contextWindow = 256000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "arcee-ai/trinity-large-preview:free", name = "Trinity Large", reasoning = false, input = ["text"], contextWindow = 131000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
]
}
llama-as-openai = {
@ -155,9 +156,8 @@ resource "kubernetes_config_map" "openclaw_config" {
apiKey = var.llama_api_key
api = "openai-completions"
models = [
{ id = "Llama-3.3-70B-Instruct", name = "Llama-3.3-70B-Instruct", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "Llama-4-Scout-17B-16E-Instruct-FP8", name = "Llama-4-Scout-17B-16E-Instruct-FP8", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "Llama-4-Maverick-17B-128E-Instruct-FP8", name = "Llama-4-Maverick-17B-128E-Instruct-FP8", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "Llama-4-Maverick-17B-128E-Instruct-FP8", name = "Llama 4 Maverick", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "Llama-4-Scout-17B-16E-Instruct-FP8", name = "Llama 4 Scout", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
]
}
}
@ -345,10 +345,6 @@ resource "kubernetes_deployment" "openclaw" {
name = "GIT_CONFIG_GLOBAL"
value = "/home/node/.openclaw/.gitconfig"
}
env {
name = "GEMINI_API_KEY"
value = var.gemini_api_key
}
# Skill secrets - Home Assistant
env {
name = "HOME_ASSISTANT_URL"