[ci skip] openclaw: add modelrelay sidecar as fallback model router
- Deploy modelrelay as sidecar container (auto-routes to fastest free model) - Configured with NVIDIA NIM + OpenRouter API keys - Primary: Mistral Large 3 (NIM), Fallback 1: Nemotron Ultra (NIM), Fallback 2: modelrelay/auto-fastest (80+ free models) - Modelrelay web UI available at pod:7352
This commit is contained in:
parent
207164050c
commit
f031a6bcf6
1 changed files with 52 additions and 1 deletions
|
|
@ -91,9 +91,10 @@ resource "kubernetes_config_map" "openclaw_config" {
|
||||||
bootstrapMaxChars = 30000
|
bootstrapMaxChars = 30000
|
||||||
model = {
|
model = {
|
||||||
primary = "nim/mistralai/mistral-large-3-675b-instruct-2512"
|
primary = "nim/mistralai/mistral-large-3-675b-instruct-2512"
|
||||||
fallbacks = ["nim/nvidia/llama-3.1-nemotron-ultra-253b-v1", "llama-as-openai/Llama-4-Maverick-17B-128E-Instruct-FP8"]
|
fallbacks = ["nim/nvidia/llama-3.1-nemotron-ultra-253b-v1", "modelrelay/auto-fastest"]
|
||||||
}
|
}
|
||||||
models = {
|
models = {
|
||||||
|
"modelrelay/auto-fastest" = {}
|
||||||
"nim/deepseek-ai/deepseek-v3.2" = {}
|
"nim/deepseek-ai/deepseek-v3.2" = {}
|
||||||
"nim/qwen/qwen3.5-397b-a17b" = {}
|
"nim/qwen/qwen3.5-397b-a17b" = {}
|
||||||
"nim/mistralai/mistral-large-3-675b-instruct-2512" = {}
|
"nim/mistralai/mistral-large-3-675b-instruct-2512" = {}
|
||||||
|
|
@ -143,6 +144,14 @@ resource "kubernetes_config_map" "openclaw_config" {
|
||||||
models = {
|
models = {
|
||||||
mode = "merge"
|
mode = "merge"
|
||||||
providers = {
|
providers = {
|
||||||
|
modelrelay = {
|
||||||
|
baseUrl = "http://127.0.0.1:7352/v1"
|
||||||
|
api = "openai-completions"
|
||||||
|
apiKey = "modelrelay"
|
||||||
|
models = [
|
||||||
|
{ id = "auto-fastest", name = "Auto (Fastest)", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
|
||||||
|
]
|
||||||
|
}
|
||||||
nim = {
|
nim = {
|
||||||
baseUrl = "https://integrate.api.nvidia.com/v1"
|
baseUrl = "https://integrate.api.nvidia.com/v1"
|
||||||
api = "openai-completions"
|
api = "openai-completions"
|
||||||
|
|
@ -464,6 +473,48 @@ resource "kubernetes_deployment" "openclaw" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Sidecar: modelrelay — auto-routes to fastest healthy free model
|
||||||
|
container {
|
||||||
|
name = "modelrelay"
|
||||||
|
image = "node:22-alpine"
|
||||||
|
command = ["sh", "-c", <<-EOF
|
||||||
|
if [ ! -f /tools/modelrelay/node_modules/.package-lock.json ]; then
|
||||||
|
mkdir -p /tools/modelrelay
|
||||||
|
cd /tools/modelrelay
|
||||||
|
npm init -y > /dev/null 2>&1
|
||||||
|
npm install modelrelay > /dev/null 2>&1
|
||||||
|
fi
|
||||||
|
cd /tools/modelrelay
|
||||||
|
exec npx modelrelay --port 7352
|
||||||
|
EOF
|
||||||
|
]
|
||||||
|
port {
|
||||||
|
container_port = 7352
|
||||||
|
}
|
||||||
|
env {
|
||||||
|
name = "NVIDIA_API_KEY"
|
||||||
|
value = var.nvidia_api_key
|
||||||
|
}
|
||||||
|
env {
|
||||||
|
name = "OPENROUTER_API_KEY"
|
||||||
|
value = var.openrouter_api_key
|
||||||
|
}
|
||||||
|
volume_mount {
|
||||||
|
name = "tools"
|
||||||
|
mount_path = "/tools"
|
||||||
|
}
|
||||||
|
resources {
|
||||||
|
limits = {
|
||||||
|
cpu = "500m"
|
||||||
|
memory = "256Mi"
|
||||||
|
}
|
||||||
|
requests = {
|
||||||
|
cpu = "25m"
|
||||||
|
memory = "64Mi"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
volume {
|
volume {
|
||||||
name = "tools"
|
name = "tools"
|
||||||
nfs {
|
nfs {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue