From b233aba71037df96602953472a21a5dd13a85696 Mon Sep 17 00:00:00 2001
From: Viktor Barzin <vbarzin@gmail.com>
Date: Fri, 22 May 2026 15:23:17 +0000
Subject: [PATCH] openclaw: switch primary to nim/meta/llama-3.1-70b-instruct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Auth audit on 2026-05-22 — all the broken paths and the one that works:

- openai-codex OAuth: EXPIRED (ChatGPT Plus, ancaelena98@gmail.com)
- secret/openclaw → openai_api_key (sk-svcacct): insufficient_quota
- openrouter_api_key: "Key limit exceeded (total limit)"
- llama_api_key: region-blocked
- anthropic_api_key: sk-ant-oat-… (OAuth refresh token, not a real
  x-api-key — won't auth via x-api-key header)
- nvidia_api_key (NIM): WORKS. The key was already baked into the
  openclaw.json providers.nim.apiKey from secret/openclaw → nvidia_api_key.

Two NIM models verified end-to-end (call from inside openclaw pod
with tool-call schema, both returned proper {tool_calls:[…]} JSON):
- meta/llama-3.1-70b-instruct      — 0.58s, primary
- meta/llama-4-maverick-17b-128e   — 16s, smarter, fallback

Fallback chain: maverick → openai-codex (auto-promotes once re-authed)
→ modelrelay/auto-fastest (last resort, hallucinates instead of
tool-calling, but at least responds).

Models registered in both `agents.defaults.models` (allowlist) and
`models.providers.nim.models` (capability declarations) so the agent
sees them as available tools. Startup `models set` updated to pin
the new primary across `doctor --fix` runs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 stacks/openclaw/main.tf | 47 ++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/stacks/openclaw/main.tf b/stacks/openclaw/main.tf
index 4ef5e382..3b573db0 100644
--- a/stacks/openclaw/main.tf
+++ b/stacks/openclaw/main.tf
@@ -132,24 +132,29 @@ resource "kubernetes_config_map" "openclaw_config" {
             mode = "off"
           }
           model = {
-            # ChatGPT Plus OAuth via openai-codex plugin (account:
-            # ancaelena98@gmail.com). gpt-5.4-mini is the only mini
-            # variant the Codex backend accepts for Plus tier;
-            # gpt-5-mini / gpt-5.1-codex-mini return model_not_found
-            # / "not supported with ChatGPT account". Plus rate-card:
-            # 1,200–7,000 local msgs / 5h on gpt-5.4-mini.
-            #
-            # If you see "No API key found for provider openai-codex"
-            # / "OAuth refresh failed" in logs, the OAuth token has
-            # expired. Re-auth:
-            #   kubectl -n openclaw exec -it $(kubectl -n openclaw \
-            #     get pods -l app=openclaw -o jsonpath='{.items[0].metadata.name}') \
-            #     -c openclaw -- node /app/openclaw.mjs models auth login \
-            #     --provider openai-codex
-            # Follow the OAuth URL+code prompt. Tokens persist on the
-            # openclaw-home PVC so it sticks across pod restarts.
-            primary   = "openai-codex/gpt-5.4-mini"
-            fallbacks = ["openai-codex/gpt-5.5", "modelrelay/auto-fastest", "nim/qwen/qwen3-coder-480b-a35b-instruct"]
+            # 2026-05-22: switched primary to nim/meta/llama-3.1-70b-instruct.
+            # Verified end-to-end with tool calls (sub-second responses,
+            # proper tool_calls in API response). Auth audit on this date:
+            #   - openai-codex OAuth: EXPIRED (ancaelena98@gmail.com,
+            #     ChatGPT Plus). Re-auth requires interactive TTY:
+            #       kubectl -n openclaw exec -it $(kubectl -n openclaw \
+            #         get pods -l app=openclaw -o jsonpath='{.items[0].metadata.name}') \
+            #         -c openclaw -- node /app/openclaw.mjs models auth \
+            #         login --provider openai-codex
+            #   - secret/openclaw → openai_api_key (sk-svcacct…):
+            #     insufficient_quota (billing exhausted)
+            #   - openrouter_api_key: "Key limit exceeded"
+            #   - llama_api_key: region-blocked
+            #   - anthropic_api_key: sk-ant-oat-… (OAuth refresh token,
+            #     NOT a real x-api-key — won't auth)
+            #   - nvidia_api_key: WORKS. nim/meta/llama-3.1-70b-instruct
+            #     and nim/meta/llama-4-maverick-17b-128e-instruct both
+            #     tool-call reliably.
+            # Keep codex as a fallback so it auto-promotes once
+            # re-authed; modelrelay last because it routes to a
+            # small model that hallucinates instead of tool-calling.
+            primary   = "nim/meta/llama-3.1-70b-instruct"
+            fallbacks = ["nim/meta/llama-4-maverick-17b-128e-instruct", "openai-codex/gpt-5.4-mini", "modelrelay/auto-fastest"]
           }
           models = {
             "modelrelay/auto-fastest"                                = {}
@@ -159,6 +164,8 @@ resource "kubernetes_config_map" "openclaw_config" {
             "nim/qwen/qwen3-coder-480b-a35b-instruct"                = {}
             "nim/nvidia/llama-3.1-nemotron-ultra-253b-v1"            = {}
             "nim/z-ai/glm5"                                          = {}
+            "nim/meta/llama-3.1-70b-instruct"                        = {}
+            "nim/meta/llama-4-maverick-17b-128e-instruct"            = {}
             "llama-as-openai/Llama-4-Maverick-17B-128E-Instruct-FP8" = {}
             "llama-as-openai/Llama-4-Scout-17B-16E-Instruct-FP8"     = {}
             "openrouter/stepfun/step-3.5-flash:free"                 = {}
@@ -244,6 +251,8 @@ resource "kubernetes_config_map" "openclaw_config" {
               { id = "qwen/qwen3-coder-480b-a35b-instruct", name = "Qwen 3 Coder", reasoning = false, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
               { id = "nvidia/llama-3.1-nemotron-ultra-253b-v1", name = "Nemotron Ultra 253B", reasoning = true, input = ["text"], contextWindow = 128000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
               { id = "z-ai/glm5", name = "GLM-5", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
+              { id = "meta/llama-3.1-70b-instruct", name = "Llama 3.1 70B Instruct", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
+              { id = "meta/llama-4-maverick-17b-128e-instruct", name = "Llama 4 Maverick (NIM)", reasoning = false, input = ["text"], contextWindow = 1000000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
             ]
           }
           openrouter = {
@@ -1110,7 +1119,7 @@ resource "kubernetes_deployment" "openclaw" {
             # at /home/node/.openclaw/.ssh (set up by init 5).
             ln -sfn /home/node/.openclaw/.ssh /home/node/.ssh
             node openclaw.mjs doctor --fix 2>/dev/null
-            node openclaw.mjs models set openai-codex/gpt-5.4-mini 2>/dev/null
+            node openclaw.mjs models set nim/meta/llama-3.1-70b-instruct 2>/dev/null
             node openclaw.mjs mcp set ha "{\"url\":\"$HA_SOFIA_MCP_URL\",\"transport\":\"streamable-http\"}" 2>/dev/null
             node openclaw.mjs mcp set context7 '{"command":"npx","args":["-y","@upstash/context7-mcp"]}' 2>/dev/null
             node openclaw.mjs mcp set playwright '{"url":"http://localhost:3000/mcp","transport":"streamable-http"}' 2>/dev/null