[ci skip] openclaw: switch to free agentic models via NVIDIA NIM, OpenRouter, Llama API

- Primary: Mistral Large 3 (675B) on NIM - always warm, excellent tool calling - Fallback 1: Nemotron Ultra 253B on NIM - Fallback 2: Llama 4 Maverick on Llama API (different provider for resilience) - 10 models total across 3 providers, all free - Removed: Modal (GLM-5), Gemini, Ollama providers - Added: NVIDIA NIM provider with DeepSeek V3.2, Qwen 3.5, Qwen 3 Coder, GLM-5 - Bumped maxTokens from 8192 to 16384 for agentic output room
2026-03-01 13:22:47 +00:00 · 2026-03-01 13:22:47 +00:00 · 014f6cad5a
commit 014f6cad5a
parent 99ecba46db
1 changed files with 30 additions and 34 deletions
--- a/stacks/openclaw/main.tf
+++ b/stacks/openclaw/main.tf
@ -1,12 +1,11 @@
 variable "tls_secret_name" { type = string }
 variable "openclaw_ssh_key" { type = string }
 variable "openclaw_skill_secrets" { type = map(string) }
-variable "gemini_api_key" { type = string }
 variable "llama_api_key" { type = string }
 variable "brave_api_key" { type = string }
-variable "modal_api_key" { type = string }
+variable "openrouter_api_key" { type = string }
+variable "nvidia_api_key" { type = string }
 variable "nfs_server" { type = string }
-variable "ollama_host" { type = string }


 resource "kubernetes_namespace" "openclaw" {
@ -87,14 +86,20 @@ resource "kubernetes_config_map" "openclaw_config" {
          contextTokens     = 1000000
          bootstrapMaxChars = 30000
          model = {
-            primary   = "modal/zai-org/GLM-5-FP8"
-            fallbacks = ["gemini/gemini-2.5-flash", "llama-as-openai/Llama-3.3-70B-Instruct"]
+            primary   = "nim/mistralai/mistral-large-3-675b-instruct-2512"
+            fallbacks = ["nim/nvidia/llama-3.1-nemotron-ultra-253b-v1", "llama-as-openai/Llama-4-Maverick-17B-128E-Instruct-FP8"]
          }
          models = {
-            "modal/zai-org/GLM-5-FP8"                            = { streaming = false }
-            "gemini/gemini-2.5-flash"                            = {}
-            "llama-as-openai/Llama-3.3-70B-Instruct"             = {}
-            "llama-as-openai/Llama-4-Scout-17B-16E-Instruct-FP8" = {}
+            "nim/deepseek-ai/deepseek-v3.2"                          = {}
+            "nim/qwen/qwen3.5-397b-a17b"                             = {}
+            "nim/mistralai/mistral-large-3-675b-instruct-2512"       = {}
+            "nim/qwen/qwen3-coder-480b-a35b-instruct"                = {}
+            "nim/nvidia/llama-3.1-nemotron-ultra-253b-v1"            = {}
+            "nim/z-ai/glm5"                                          = {}
+            "llama-as-openai/Llama-4-Maverick-17B-128E-Instruct-FP8" = {}
+            "llama-as-openai/Llama-4-Scout-17B-16E-Instruct-FP8"     = {}
+            "openrouter/stepfun/step-3.5-flash:free"                 = {}
+            "openrouter/arcee-ai/trinity-large-preview:free"         = {}
          }
        }
      }
@ -124,30 +129,26 @@ resource "kubernetes_config_map" "openclaw_config" {
      models = {
        mode = "merge"
        providers = {
-          modal = {
-            baseUrl = "https://api.us-west-2.modal.direct/v1"
+          nim = {
+            baseUrl = "https://integrate.api.nvidia.com/v1"
            api     = "openai-completions"
-            apiKey  = var.modal_api_key
+            apiKey  = var.nvidia_api_key
            models = [
-              { id = "zai-org/GLM-5-FP8", name = "GLM-5", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
+              { id = "deepseek-ai/deepseek-v3.2", name = "DeepSeek V3.2", reasoning = false, input = ["text"], contextWindow = 164000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
+              { id = "qwen/qwen3.5-397b-a17b", name = "Qwen 3.5", reasoning = true, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
+              { id = "mistralai/mistral-large-3-675b-instruct-2512", name = "Mistral Large 3", reasoning = false, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
+              { id = "qwen/qwen3-coder-480b-a35b-instruct", name = "Qwen 3 Coder", reasoning = false, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
+              { id = "nvidia/llama-3.1-nemotron-ultra-253b-v1", name = "Nemotron Ultra 253B", reasoning = true, input = ["text"], contextWindow = 128000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
+              { id = "z-ai/glm5", name = "GLM-5", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
            ]
          }
-          gemini = {
-            baseUrl = "https://generativelanguage.googleapis.com/v1beta"
-            api     = "google-generative-ai"
-            apiKey  = var.gemini_api_key
-            models = [
-              { id = "gemini-2.5-flash", name = "gemini-2.5-flash", reasoning = true, input = ["text", "image"], contextWindow = 1048576, maxTokens = 65536, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
-            ]
-          }
-          ollama = {
-            baseUrl = "http://${var.ollama_host}:11434/v1"
+          openrouter = {
+            baseUrl = "https://openrouter.ai/api/v1"
            api     = "openai-completions"
-            apiKey  = "ollama"
+            apiKey  = var.openrouter_api_key
            models = [
-              { id = "qwen2.5-coder:14b", name = "qwen2.5-coder:14b", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
-              { id = "qwen2.5:14b", name = "qwen2.5:14b", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
-              { id = "deepseek-r1:14b", name = "deepseek-r1:14b", reasoning = true, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
+              { id = "stepfun/step-3.5-flash:free", name = "Step 3.5 Flash", reasoning = true, input = ["text"], contextWindow = 256000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
+              { id = "arcee-ai/trinity-large-preview:free", name = "Trinity Large", reasoning = false, input = ["text"], contextWindow = 131000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
            ]
          }
          llama-as-openai = {
@ -155,9 +156,8 @@ resource "kubernetes_config_map" "openclaw_config" {
            apiKey  = var.llama_api_key
            api     = "openai-completions"
            models = [
-              { id = "Llama-3.3-70B-Instruct", name = "Llama-3.3-70B-Instruct", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
-              { id = "Llama-4-Scout-17B-16E-Instruct-FP8", name = "Llama-4-Scout-17B-16E-Instruct-FP8", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
-              { id = "Llama-4-Maverick-17B-128E-Instruct-FP8", name = "Llama-4-Maverick-17B-128E-Instruct-FP8", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 8192, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
+              { id = "Llama-4-Maverick-17B-128E-Instruct-FP8", name = "Llama 4 Maverick", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
+              { id = "Llama-4-Scout-17B-16E-Instruct-FP8", name = "Llama 4 Scout", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
            ]
          }
        }
@ -345,10 +345,6 @@ resource "kubernetes_deployment" "openclaw" {
            name  = "GIT_CONFIG_GLOBAL"
            value = "/home/node/.openclaw/.gitconfig"
          }
-          env {
-            name  = "GEMINI_API_KEY"
-            value = var.gemini_api_key
-          }
          # Skill secrets - Home Assistant
          env {
            name  = "HOME_ASSISTANT_URL"