infra/stacks/openclaw/main.tf
Viktor Barzin 9ad52dfd61 openclaw: SSH + tmux task fallback to devvm
Give the OpenClaw pod two new capabilities:

1. Host-tools bundle. New init container `install-host-tools` extracts
   openssh-client + dnsutils + tmux + jq + ripgrep + fd + vault + yq +
   friends into /tools/host-tools/, with the bookworm-slim libs the
   binaries need. PATH + LD_LIBRARY_PATH on the main container point
   ld.so at the bundle. Idempotent via /tools/host-tools/.installed-v1
   marker; smoke test (ldd-based) fails the init at deploy time if any
   binary has unresolved deps. Bundle is ~558 MB on the existing
   /srv/nfs/openclaw/tools NFS.

2. devvm SSH + async task pattern. New init `setup-ssh-config` writes
   id_rsa/config/known_hosts under /home/node/.openclaw/.ssh; main
   container startup symlinks /home/node/.ssh → there. New
   /usr/local/bin/openclaw-task wrapper on devvm manages long-running
   work as tmux sessions on devvm (sessions and logs survive pod
   restarts — they live on devvm, not in the pod). New init container
   `seed-devvm-memory-note` drops a markdown note teaching the pattern;
   main container startup now runs `openclaw memory index --force` so
   the note is searchable on first boot.

Design + verified E2E flow in
docs/plans/2026-05-22-openclaw-devvm-access-design.md. Persistence test
green: spawned a 50s task from pod A, deleted pod A, new pod B saw the
task finish and read its full log.

Pre-existing keel.sh annotation drift on openclaw/{openlobster,
task_webhook} cleaned up in the same apply.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-22 10:20:00 +00:00

1752 lines
59 KiB
HCL
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

variable "tls_secret_name" {
type = string
sensitive = true
}
variable "nfs_server" { type = string }
data "vault_kv_secret_v2" "secrets" {
mount = "secret"
name = "openclaw"
}
locals {
skill_secrets = jsondecode(data.vault_kv_secret_v2.secrets.data["skill_secrets"])
}
resource "kubernetes_namespace" "openclaw" {
metadata {
name = "openclaw"
labels = {
tier = local.tiers.aux
"resource-governance/custom-limitrange" = "true"
"resource-governance/custom-quota" = "true"
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.openclaw.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_manifest" "external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "openclaw-secrets"
namespace = "openclaw"
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = {
name = "openclaw-secrets"
}
dataFrom = [{
extract = {
key = "openclaw"
}
}]
}
}
depends_on = [kubernetes_namespace.openclaw]
}
resource "kubernetes_service_account" "openclaw" {
metadata {
name = "openclaw"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
}
resource "kubernetes_cluster_role_binding" "openclaw" {
metadata {
name = "openclaw-cluster-admin"
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.openclaw.metadata[0].name
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = "cluster-admin"
}
}
resource "kubernetes_secret" "ssh_key" {
metadata {
name = "ssh-key"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
data = {
"id_rsa" = data.vault_kv_secret_v2.secrets.data["ssh_key"]
}
type = "generic"
}
resource "kubernetes_config_map" "git_crypt_key" {
metadata {
name = "git-crypt-key"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
data = {
"key" = filebase64("${path.root}/../../.git/git-crypt/keys/default")
}
}
resource "kubernetes_config_map" "openclaw_config" {
metadata {
name = "openclaw-config"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
data = {
"openclaw.json" = jsonencode({
gateway = {
mode = "local"
bind = "lan"
trustedProxies = ["10.0.0.0/8"]
controlUi = {
dangerouslyDisableDeviceAuth = true
dangerouslyAllowHostHeaderOriginFallback = true
}
}
agents = {
defaults = {
contextTokens = 1000000
bootstrapMaxChars = 30000
workspace = "/workspace"
sandbox = {
mode = "off"
}
model = {
# ChatGPT Plus OAuth via openai-codex plugin (account: ancaelena98@gmail.com).
# gpt-5.4-mini is the only mini variant the Codex backend accepts for Plus tier;
# gpt-5-mini / gpt-5.1-codex-mini return model_not_found / "not supported with
# ChatGPT account". Plus rate-card: 1,2007,000 local msgs / 5h on gpt-5.4-mini.
primary = "openai-codex/gpt-5.4-mini"
fallbacks = ["openai-codex/gpt-5.5", "nim/qwen/qwen3-coder-480b-a35b-instruct", "modelrelay/auto-fastest"]
}
models = {
"modelrelay/auto-fastest" = {}
"nim/deepseek-ai/deepseek-v3.2" = {}
"nim/qwen/qwen3.5-397b-a17b" = {}
"nim/mistralai/mistral-large-3-675b-instruct-2512" = {}
"nim/qwen/qwen3-coder-480b-a35b-instruct" = {}
"nim/nvidia/llama-3.1-nemotron-ultra-253b-v1" = {}
"nim/z-ai/glm5" = {}
"llama-as-openai/Llama-4-Maverick-17B-128E-Instruct-FP8" = {}
"llama-as-openai/Llama-4-Scout-17B-16E-Instruct-FP8" = {}
"openrouter/stepfun/step-3.5-flash:free" = {}
"openrouter/arcee-ai/trinity-large-preview:free" = {}
"openai-codex/gpt-5.4-mini" = {}
"openai-codex/gpt-5.5" = {}
}
}
}
tools = {
profile = "full"
deny = []
elevated = {
enabled = true
}
exec = {
host = "gateway"
security = "full"
ask = "off"
pathPrepend = ["/tools", "/workspace"]
}
web = {
search = {
enabled = true
provider = "brave"
apiKey = data.vault_kv_secret_v2.secrets.data["brave_api_key"]
maxResults = 5
}
fetch = {
enabled = true
maxChars = 50000
timeoutSeconds = 30
}
}
}
plugins = {
allow = ["memory-core", "recruiter-api"]
slots = { memory = "memory-core" }
load = {
# /app/extensions is the legacy bundled-plugins path; OpenClaw
# already loads bundled plugins natively (doctor warning).
paths = ["/home/node/.openclaw/extensions"]
}
}
# Note: mcp.servers is configured via `openclaw mcp set` in the main
# container startup command (see below) rather than in this ConfigMap.
# OpenClaw's `doctor --fix` (which runs on every pod start) strips
# bulk-loaded mcp blocks from openclaw.json, but preserves CLI-set
# entries. The CLI is the canonical writer.
commands = {
native = true
nativeSkills = true
}
channels = {
telegram = {
enabled = true
botToken = data.vault_kv_secret_v2.secrets.data["telegram_bot_token"]
dmPolicy = "allowlist"
allowFrom = ["tg:8281953845"]
groupPolicy = "allowlist"
streamMode = "partial"
}
}
models = {
mode = "merge"
providers = {
modelrelay = {
baseUrl = "http://127.0.0.1:7352/v1"
api = "openai-completions"
apiKey = "modelrelay"
models = [
{ id = "auto-fastest", name = "Auto (Fastest)", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
]
}
nim = {
baseUrl = "https://integrate.api.nvidia.com/v1"
api = "openai-completions"
apiKey = data.vault_kv_secret_v2.secrets.data["nvidia_api_key"]
models = [
{ id = "deepseek-ai/deepseek-v3.2", name = "DeepSeek V3.2", reasoning = false, input = ["text"], contextWindow = 164000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "qwen/qwen3.5-397b-a17b", name = "Qwen 3.5", reasoning = true, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "mistralai/mistral-large-3-675b-instruct-2512", name = "Mistral Large 3", reasoning = false, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "qwen/qwen3-coder-480b-a35b-instruct", name = "Qwen 3 Coder", reasoning = false, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "nvidia/llama-3.1-nemotron-ultra-253b-v1", name = "Nemotron Ultra 253B", reasoning = true, input = ["text"], contextWindow = 128000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "z-ai/glm5", name = "GLM-5", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
]
}
openrouter = {
baseUrl = "https://openrouter.ai/api/v1"
api = "openai-completions"
apiKey = data.vault_kv_secret_v2.secrets.data["openrouter_api_key"]
models = [
{ id = "stepfun/step-3.5-flash:free", name = "Step 3.5 Flash", reasoning = true, input = ["text"], contextWindow = 256000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "arcee-ai/trinity-large-preview:free", name = "Trinity Large", reasoning = false, input = ["text"], contextWindow = 131000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
]
}
llama-as-openai = {
baseUrl = "https://api.llama.com/compat/v1"
apiKey = data.vault_kv_secret_v2.secrets.data["llama_api_key"]
api = "openai-completions"
models = [
{ id = "Llama-4-Maverick-17B-128E-Instruct-FP8", name = "Llama 4 Maverick", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "Llama-4-Scout-17B-16E-Instruct-FP8", name = "Llama 4 Scout", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
]
}
}
}
wizard = {
lastRunAt = "2026-03-01T15:11:54.176Z"
lastRunVersion = "2026.2.9"
lastRunCommand = "configure"
lastRunMode = "local"
}
})
}
}
resource "random_password" "gateway_token" {
length = 32
special = false
}
# Prometheus exporter script — read by the openclaw-exporter sidecar.
# Stdlib-only Python so no pip install at startup. Reads sessions JSONL +
# auth-profiles.json from the NFS-backed openclaw home volume (mounted ro).
resource "kubernetes_config_map" "openclaw_exporter" {
metadata {
name = "openclaw-exporter"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
data = {
"exporter.py" = file("${path.module}/files/exporter.py")
}
}
module "nfs_tools_host" {
source = "../../modules/kubernetes/nfs_volume"
name = "openclaw-tools-host"
namespace = kubernetes_namespace.openclaw.metadata[0].name
nfs_server = "192.168.1.127"
nfs_path = "/srv/nfs/openclaw/tools"
}
resource "kubernetes_persistent_volume_claim" "home_proxmox" {
wait_until_bound = false
metadata {
name = "openclaw-home-proxmox"
namespace = kubernetes_namespace.openclaw.metadata[0].name
annotations = {
"resize.topolvm.io/threshold" = "10%"
"resize.topolvm.io/increase" = "100%"
"resize.topolvm.io/storage_limit" = "5Gi"
}
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm"
resources {
requests = {
storage = "1Gi"
}
}
}
lifecycle {
# The autoresizer expands requests.storage up to storage_limit and
# PVCs can't shrink. Without this, every TF apply tries to revert
# to the spec value, K8s rejects the shrink, and the PVC ends up
# in Terminating-but-in-use limbo.
ignore_changes = [spec[0].resources[0].requests]
}
}
module "nfs_workspace_host" {
source = "../../modules/kubernetes/nfs_volume"
name = "openclaw-workspace-host"
namespace = kubernetes_namespace.openclaw.metadata[0].name
nfs_server = "192.168.1.127"
nfs_path = "/srv/nfs/openclaw/workspace"
}
resource "kubernetes_persistent_volume_claim" "data_proxmox" {
wait_until_bound = false
metadata {
name = "openclaw-data-proxmox"
namespace = kubernetes_namespace.openclaw.metadata[0].name
annotations = {
"resize.topolvm.io/threshold" = "10%"
"resize.topolvm.io/increase" = "100%"
"resize.topolvm.io/storage_limit" = "5Gi"
}
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm"
resources {
requests = {
storage = "1Gi"
}
}
}
lifecycle {
# The autoresizer expands requests.storage up to storage_limit and
# PVCs can't shrink. Without this, every TF apply tries to revert
# to the spec value, K8s rejects the shrink, and the PVC ends up
# in Terminating-but-in-use limbo.
ignore_changes = [spec[0].resources[0].requests]
}
}
## cc-config NFS volume removed — replaced by dotfiles repo clone in init container
## See init_container "install-dotfiles" in the deployment
resource "kubernetes_deployment" "openclaw" {
metadata {
name = "openclaw"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "openclaw"
tier = local.tiers.aux
}
}
spec {
strategy {
type = "Recreate"
}
replicas = 1
selector {
match_labels = {
app = "openclaw"
}
}
template {
metadata {
labels = {
app = "openclaw"
}
annotations = {
"reloader.stakater.com/search" = "true"
# Prometheus auto-discovers pods with these annotations.
# Scraped by the openclaw-exporter sidecar — exposes /metrics on :9099.
"prometheus.io/scrape" = "true"
"prometheus.io/port" = "9099"
"prometheus.io/path" = "/metrics"
}
}
spec {
service_account_name = kubernetes_service_account.openclaw.metadata[0].name
# Init 0: fix /workspace ownership so node user can write
init_container {
name = "fix-workspace-perms"
image = "busybox:1.37"
command = ["sh", "-c", "chown 1000:1000 /workspace"]
volume_mount {
name = "workspace"
mount_path = "/workspace"
}
}
# Init 1: copy openclaw.json from ConfigMap into writable NFS home
init_container {
name = "copy-config"
image = "busybox:1.37"
command = ["sh", "-c", "cp /config/openclaw.json /home/node/.openclaw/openclaw.json && chown 1000:1000 /home/node/.openclaw/openclaw.json"]
volume_mount {
name = "openclaw-config"
mount_path = "/config"
}
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
}
}
# Init 1b: regenerate kubeconfig pointing at the projected SA tokenFile
# so kubectl always reads the fresh, kubelet-rotated token. Without
# this the previously-baked kubeconfig retains a SA token bound to a
# long-dead pod and kubectl returns "must be logged in to the server".
init_container {
name = "setup-kubeconfig"
image = "busybox:1.37"
command = ["sh", "-c", <<-EOT
cat > /home/node/.openclaw/kubeconfig <<'KUBECONFIG_EOF'
apiVersion: v1
kind: Config
clusters:
- cluster:
certificate-authority: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
server: https://kubernetes.default.svc
name: in-cluster
contexts:
- context:
cluster: in-cluster
user: openclaw
namespace: openclaw
name: in-cluster
current-context: in-cluster
users:
- name: openclaw
user:
tokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
KUBECONFIG_EOF
chown 1000:1000 /home/node/.openclaw/kubeconfig
chmod 0644 /home/node/.openclaw/kubeconfig
EOT
]
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
}
}
# Init 2 removed: install-dotfiles init container was cloning dotfiles
# repo via git on every pod start, causing 200+ small NFS writes.
# Dotfiles already exist on NFS at /home/node/.openclaw/dotfiles from
# a previous clone. To update, run git pull manually or via CronJob.
# Init 3: install the recruiter-api OpenClaw plugin from the
# recruiter-responder image into NFS extensions/. Plugin lifecycle
# is coupled to the recruiter-responder image tag — bumping that
# tag re-installs the plugin on next openclaw pod restart.
init_container {
name = "install-recruiter-plugin"
image = "forgejo.viktorbarzin.me/viktor/recruiter-responder:latest"
command = ["sh", "-c", <<-EOT
set -eu
mkdir -p /home/node/.openclaw/extensions/recruiter-api
cp -r /app/openclaw-plugin/. /home/node/.openclaw/extensions/recruiter-api/
chown -R 1000:1000 /home/node/.openclaw/extensions/recruiter-api
echo "recruiter-api plugin installed at /home/node/.openclaw/extensions/recruiter-api"
ls -la /home/node/.openclaw/extensions/recruiter-api
EOT
]
# /home/node/.openclaw is uid 1000 on NFS; recruiter-responder image
# otherwise drops to uid 10001 which can't write or chown. Run as
# root so mkdir + chown succeed.
security_context {
run_as_user = 0
}
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
}
resources {
requests = { cpu = "50m", memory = "64Mi" }
limits = { memory = "128Mi" }
}
}
# Init 4: install host-tools bundle (ssh, vault, jq, ripgrep, tmux, …)
# into /tools/host-tools/ so the in-pod agent reaches CLI parity
# with the dev VM. Upstream OpenClaw image is minimal Debian
# bookworm running as uid 1000 — can't apt-install at runtime.
# Idempotent via marker file; bump suffix to force reinstall.
# See docs/plans/2026-05-22-openclaw-devvm-access-design.md.
init_container {
name = "install-host-tools"
image = "debian:bookworm-slim"
command = ["bash", "-c", <<-EOT
set -euo pipefail
DEST=/tools/host-tools
MARKER="$DEST/.installed-v1"
if [ -f "$MARKER" ]; then
echo "host-tools v1 already installed (skipping)"
exit 0
fi
echo "installing host-tools v1 ..."
rm -rf "$DEST"
mkdir -p "$DEST/root" "$DEST/bin"
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
# debian:bookworm-slim doesn't ship wget/unzip; install
# transiently into this init container's filesystem so we
# can download the static binaries below.
apt-get install -y --no-install-recommends wget unzip ca-certificates
# NOTE: we deliberately do NOT pass --no-install-recommends to
# the download step. ssh links against libgssapi-krb5-2 which
# is a hard Depends but its transitive deps (libkrb5-3 etc.)
# need to come along too. The bundle is a self-contained
# /usr-like tree that the openclaw container can use via
# LD_LIBRARY_PATH, so missing deps = broken binaries.
APT_PKGS="openssh-client dnsutils iputils-ping wget gnupg jq ripgrep fd-find ncdu htop strace tcpdump tmux unzip ca-certificates"
apt-get install -y --download-only $APT_PKGS
for d in /var/cache/apt/archives/*.deb; do
dpkg-deb -x "$d" "$DEST/root/"
done
VAULT_VER=1.18.3
YQ_VER=v4.44.3
wget -qO /tmp/vault.zip \
"https://releases.hashicorp.com/vault/$${VAULT_VER}/vault_$${VAULT_VER}_linux_amd64.zip"
unzip -o /tmp/vault.zip vault -d "$DEST/bin/"
chmod +x "$DEST/bin/vault"
wget -qO "$DEST/bin/yq" \
"https://github.com/mikefarah/yq/releases/download/$${YQ_VER}/yq_linux_amd64"
chmod +x "$DEST/bin/yq"
# Smoke test fail init if any bundled binary has unresolved
# shared-lib deps, so glibc / shared-lib drift surfaces at
# deploy time. We don't run --version because flag support
# varies (older scp returns non-zero, ping/nslookup use weird
# conventions). ldd is the reliable signal: if any "not
# found" appears, the binary won't load when called.
# LD_LIBRARY_PATH points ld.so at the bundled libs (the
# openclaw main container sets the same env).
export PATH="$DEST/root/usr/bin:$DEST/root/usr/sbin:$DEST/root/bin:$DEST/root/sbin:$DEST/bin:$PATH"
export LD_LIBRARY_PATH="$DEST/root/usr/lib/x86_64-linux-gnu:$DEST/root/lib/x86_64-linux-gnu"
for t in ssh scp ssh-keyscan dig host nslookup ping wget gpg jq rg fdfind tmux vault yq; do
bin=$(command -v "$t" 2>/dev/null) || { echo "FAIL: $t not on PATH"; exit 1; }
if ldd "$bin" 2>&1 | grep -q "not found"; then
echo "FAIL: $t has unresolved shared libs:"
ldd "$bin"
exit 1
fi
echo "OK: $t"
done
chown -R 1000:1000 "$DEST"
touch "$MARKER"
echo "host-tools v1 install complete ($(du -sh "$DEST" | cut -f1))"
EOT
]
volume_mount {
name = "tools"
mount_path = "/tools"
}
resources {
requests = { cpu = "100m", memory = "256Mi" }
limits = { memory = "512Mi" }
}
}
# Init 5: write /home/node/.openclaw/.ssh/{id_rsa,config,known_hosts}
# so the agent can `ssh devvm` without device-trust prompts. The
# main container symlinks /home/node/.ssh here at startup so
# the ssh client picks it up via $HOME/.ssh. Installs
# openssh-client transiently into this init container so
# ssh-keyscan works without LD_LIBRARY_PATH gymnastics.
init_container {
name = "setup-ssh-config"
image = "debian:bookworm-slim"
command = ["bash", "-c", <<-EOT
set -euo pipefail
SSH=/home/node/.openclaw/.ssh
MARKER="$SSH/.configured-v1"
if [ -f "$MARKER" ]; then
echo "ssh-config v1 already set up (skipping)"
exit 0
fi
echo "installing openssh-client for ssh-keyscan ..."
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
apt-get install -y --no-install-recommends openssh-client >/dev/null
echo "configuring ssh ..."
mkdir -p "$SSH"
# Copy the secret-mounted private key into ~/.ssh with 0600
# the secret's tmpfs mount has wider perms (1777 + symlinks)
# that openssh refuses.
cp /ssh/id_rsa "$SSH/id_rsa"
chmod 0600 "$SSH/id_rsa"
cat > "$SSH/config" <<'SSH_EOF'
Host devvm
HostName 10.0.10.10
User wizard
IdentityFile ~/.ssh/id_rsa
UserKnownHostsFile ~/.ssh/known_hosts
StrictHostKeyChecking yes
SSH_EOF
chmod 0600 "$SSH/config"
ssh-keyscan -H 10.0.10.10 > "$SSH/known_hosts" 2>/tmp/keyscan.err
if [ ! -s "$SSH/known_hosts" ]; then
echo "ssh-keyscan produced empty known_hosts; stderr:"
cat /tmp/keyscan.err
exit 1
fi
chmod 0644 "$SSH/known_hosts"
chown -R 1000:1000 "$SSH"
touch "$MARKER"
echo "ssh-config v1 set up"
EOT
]
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
}
volume_mount {
name = "ssh-key"
mount_path = "/ssh"
}
resources {
requests = { cpu = "50m", memory = "64Mi" }
limits = { memory = "256Mi" }
}
}
# Init 6: seed the devvm-fallback memory note into
# /workspace/memory/projects/openclaw-runtime/. The note teaches
# openclaw the SSH+tmux pattern. The main container's startup
# runs `memory index --force` so it's searchable immediately;
# the daily memory-sync CronJob also keeps it indexed afterward.
# Always rewrites the configmap-baked note is canonical.
init_container {
name = "seed-devvm-memory-note"
image = "busybox:1.37"
command = ["sh", "-c", <<-EOT
set -eu
DIR=/workspace/memory/projects/openclaw-runtime
mkdir -p "$DIR"
cat > "$DIR/devvm-fallback.md" <<'NOTE_EOF'
# Using devvm as a fallback
When in-pod tools/permissions block you, SSH to devvm and use it.
Devvm runs as wizard with full home-lab access (Vault, kubectl,
git repos, Cloudflare, etc.) and Claude Code v2+ is installed.
## One-shot lookup
ssh devvm 'vault kv get -field=brave_api_key secret/openclaw'
ssh devvm 'claude -p "investigate why frigate is restarting"'
## Long-running async work USE THIS for anything > ~2 min
Spawn in a tmux session on devvm. Sessions and logs survive
OpenClaw pod restarts (they live on devvm, not in this pod).
# spawn
ssh devvm openclaw-task new my-task "claude -p --dangerously-skip-permissions 'do the thing'"
# poll progress (last 1000 lines of pane)
ssh devvm openclaw-task capture my-task
# interactive claude (send follow-up prompts)
ssh devvm openclaw-task claude my-task "initial prompt"
ssh devvm openclaw-task send my-task "follow-up prompt" Enter
# housekeeping
ssh devvm openclaw-task list
ssh devvm openclaw-task status my-task
ssh devvm openclaw-task kill my-task
Logs persist at ~/openclaw-tasks/<id>.log on devvm even after a
session is killed. Use `ssh devvm openclaw-task log <id>` to
retrieve them.
NOTE_EOF
chown -R 1000:1000 "$DIR"
echo "memory note seeded at $DIR/devvm-fallback.md"
EOT
]
volume_mount {
name = "workspace"
mount_path = "/workspace"
}
resources {
requests = { cpu = "10m", memory = "32Mi" }
limits = { memory = "32Mi" }
}
}
# Main container: OpenClaw
container {
name = "openclaw"
image = "ghcr.io/openclaw/openclaw:2026.5.4"
# Startup sequence:
# 1. doctor --fix repair sessions/state (also resets some config)
# 2. models set pin gpt-5.4-mini (doctor auto-promotes to gpt-5-pro otherwise)
# 3. mcp set <name> register MCP servers via the CLI (the
# ConfigMap-baked mcp.servers block gets
# stripped by doctor --fix, but CLI-written
# entries persist). Values: ha URL from
# $HA_SOFIA_MCP_URL env (Vault-sourced),
# others hard-coded.
# 4. gateway exec into the gateway process
command = ["sh", "-c", <<-EOC
# Symlink /home/node/.ssh persistent .ssh so the ssh client
# finds id_rsa/config/known_hosts via $HOME/.ssh. HOME is
# /home/node (image overlay), .ssh files live on the PVC
# at /home/node/.openclaw/.ssh (set up by init 5).
ln -sfn /home/node/.openclaw/.ssh /home/node/.ssh
node openclaw.mjs doctor --fix 2>/dev/null
node openclaw.mjs models set openai-codex/gpt-5.4-mini 2>/dev/null
node openclaw.mjs mcp set ha "{\"url\":\"$HA_SOFIA_MCP_URL\",\"transport\":\"streamable-http\"}" 2>/dev/null
node openclaw.mjs mcp set context7 '{"command":"npx","args":["-y","@upstash/context7-mcp"]}' 2>/dev/null
node openclaw.mjs mcp set playwright '{"url":"http://localhost:3000/mcp","transport":"streamable-http"}' 2>/dev/null
# doctor --fix overwrites plugins.allow with its bundled-plugins
# list. Re-add our third-party plugin to the allow list via
# `config patch`, then enable it. (Same pattern as mcp set above.)
echo '{"plugins":{"allow":["memory-core","recruiter-api","telegram","openrouter","brave","openai","codex"]}}' \
| node openclaw.mjs config patch --stdin 2>/dev/null || true
node openclaw.mjs plugins enable recruiter-api 2>/dev/null || true
# Reindex memory-core so the seeded devvm-fallback note (and
# anything else dropped under /workspace/memory/) is searchable
# on first boot; daily memory-sync CronJob also keeps it indexed.
node openclaw.mjs memory index --force 2>/dev/null || true
exec node openclaw.mjs gateway --allow-unconfigured --bind lan
EOC
]
port {
container_port = 18789
}
readiness_probe {
tcp_socket {
port = 18789
}
initial_delay_seconds = 30
period_seconds = 10
}
env {
name = "NODE_OPTIONS"
value = "--max-old-space-size=1536"
}
env {
name = "OPENCLAW_GATEWAY_TOKEN"
value = random_password.gateway_token.result
}
env {
name = "PATH"
# Host-tools bundle (installed by init 4: install-host-tools)
# comes first so ssh/scp/dig/vault/jq/etc. resolve to the
# extracted Debian binaries + the static-binary downloads.
# /bin + /sbin are needed because iputils-ping installs ping
# under /bin (not /usr/bin) on Debian.
value = "/tools/host-tools/root/usr/bin:/tools/host-tools/root/usr/sbin:/tools/host-tools/root/bin:/tools/host-tools/root/sbin:/tools/host-tools/bin:/tools:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
}
env {
# Point ld.so at the bundled libs so the host-tools binaries
# find their shared-lib deps (libgssapi_krb5, libkrb5, etc.).
# Both base images are bookworm so the libs match the
# openclaw image's libc/libssl no ABI conflicts expected.
name = "LD_LIBRARY_PATH"
value = "/tools/host-tools/root/usr/lib/x86_64-linux-gnu:/tools/host-tools/root/lib/x86_64-linux-gnu"
}
env {
name = "TF_VAR_prod"
value = "true"
}
env {
name = "KUBECONFIG"
value = "/home/node/.openclaw/kubeconfig"
}
env {
name = "GIT_CONFIG_GLOBAL"
value = "/home/node/.openclaw/.gitconfig"
}
# Skill secrets - Home Assistant
env {
name = "HOME_ASSISTANT_URL"
value = "https://ha-london.viktorbarzin.me"
}
env {
name = "HOME_ASSISTANT_TOKEN"
value = local.skill_secrets["home_assistant_token"]
}
env {
name = "HOME_ASSISTANT_SOFIA_URL"
value = "https://ha-sofia.viktorbarzin.me"
}
env {
name = "HOME_ASSISTANT_SOFIA_TOKEN"
value = local.skill_secrets["home_assistant_sofia_token"]
}
# MCP URL for ha-mcp add-on on ha-sofia (secret-path auth).
# Consumed in the startup command by `openclaw mcp set ha ...`.
env {
name = "HA_SOFIA_MCP_URL"
value = data.vault_kv_secret_v2.secrets.data["ha_sofia_mcp_url"]
}
# Skill secrets - Uptime Kuma
env {
name = "UPTIME_KUMA_PASSWORD"
value = local.skill_secrets["uptime_kuma_password"]
}
# Memory API
env {
name = "MEMORY_API_URL"
value = "http://claude-memory.claude-memory.svc.cluster.local"
}
env {
name = "MEMORY_API_KEY"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "claude_memory_api_key"
}
}
}
# Recruiter Responder API consumed by the recruiter-api plugin
# (mounted into /home/node/.openclaw/extensions/recruiter-api/ via
# the install-recruiter-plugin init container below).
env {
name = "RECRUITER_RESPONDER_URL"
value = "http://recruiter-responder.recruiter-responder.svc.cluster.local:8080"
}
env {
name = "RECRUITER_RESPONDER_TOKEN"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "recruiter_responder_bearer_token"
optional = true
}
}
}
# Telegram chat ID for the recruiter-api plugin's announcement loop.
env {
name = "VIKTOR_CHAT_ID"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "viktor_chat_id"
optional = true
}
}
}
# Python packages path for skills
env {
name = "PYTHONPATH"
value = "/tools/python-libs"
}
volume_mount {
name = "tools"
mount_path = "/tools"
}
volume_mount {
name = "workspace"
mount_path = "/workspace"
}
volume_mount {
name = "data"
mount_path = "/data"
}
volume_mount {
name = "ssh-key"
mount_path = "/ssh"
}
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
}
resources {
limits = {
memory = "2Gi"
}
requests = {
cpu = "100m"
memory = "2Gi"
}
}
}
# Sidecar: playwright-mcp headless browser for agents
container {
name = "playwright-mcp"
image = "docker.io/viktorbarzin/playwright-mcp:v1"
args = ["--headless", "--browser", "chromium", "--no-sandbox", "--port", "3000", "--host", "0.0.0.0"]
port {
container_port = 3000
}
resources {
requests = {
cpu = "50m"
memory = "256Mi"
}
limits = {
memory = "512Mi"
}
}
}
# Sidecar: openclaw-exporter Prometheus exporter for Codex/OAuth usage.
# Reads sessions JSONL files + auth-profiles.json, exposes /metrics on :9099.
# Stdlib-only Python; no pip install at startup.
container {
name = "openclaw-exporter"
image = "docker.io/library/python:3.12-slim"
command = ["python3", "/scripts/exporter.py"]
port {
container_port = 9099
name = "metrics"
}
env {
name = "OPENCLAW_HOME"
value = "/home/node/.openclaw"
}
env {
name = "METRICS_PORT"
value = "9099"
}
volume_mount {
name = "openclaw-exporter-script"
mount_path = "/scripts"
read_only = true
}
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
read_only = true
}
readiness_probe {
http_get {
path = "/healthz"
port = 9099
}
initial_delay_seconds = 5
period_seconds = 30
}
resources {
requests = {
cpu = "10m"
memory = "64Mi"
}
limits = {
memory = "128Mi"
}
}
}
# Sidecar: modelrelay auto-routes to fastest healthy free model
container {
name = "modelrelay"
image = "docker.io/library/node:22-alpine"
command = ["sh", "-c", <<-EOF
if [ ! -f /tools/modelrelay/node_modules/.package-lock.json ]; then
mkdir -p /tools/modelrelay
cd /tools/modelrelay
npm init -y > /dev/null 2>&1
npm install modelrelay > /dev/null 2>&1
fi
cd /tools/modelrelay
exec npx modelrelay --port 7352
EOF
]
port {
container_port = 7352
}
env {
name = "NVIDIA_API_KEY"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "nvidia_api_key"
}
}
}
env {
name = "OPENROUTER_API_KEY"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "openrouter_api_key"
}
}
}
volume_mount {
name = "tools"
mount_path = "/tools"
}
resources {
limits = {
memory = "256Mi"
}
requests = {
cpu = "25m"
memory = "128Mi"
}
}
}
volume {
name = "tools"
persistent_volume_claim {
claim_name = module.nfs_tools_host.claim_name
}
}
volume {
name = "openclaw-home"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.home_proxmox.metadata[0].name
}
}
volume {
name = "workspace"
persistent_volume_claim {
claim_name = module.nfs_workspace_host.claim_name
}
}
volume {
name = "data"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.data_proxmox.metadata[0].name
}
}
volume {
name = "ssh-key"
secret {
secret_name = kubernetes_secret.ssh_key.metadata[0].name
default_mode = "0600"
}
}
volume {
name = "openclaw-config"
config_map {
name = kubernetes_config_map.openclaw_config.metadata[0].name
}
}
volume {
name = "openclaw-exporter-script"
config_map {
name = kubernetes_config_map.openclaw_exporter.metadata[0].name
default_mode = "0555"
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].template[0].spec[0].dns_config]
}
}
resource "kubernetes_service" "openclaw" {
metadata {
name = "openclaw"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "openclaw"
}
}
spec {
selector = {
app = "openclaw"
}
port {
port = 80
target_port = 18789
}
}
}
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
dns_type = "non-proxied"
namespace = kubernetes_namespace.openclaw.metadata[0].name
name = "openclaw"
tls_secret_name = var.tls_secret_name
port = 80
auth = "required"
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "OpenClaw"
"gethomepage.dev/description" = "AI assistant"
"gethomepage.dev/icon" = "openai.png"
"gethomepage.dev/group" = "AI & Data"
"gethomepage.dev/pod-selector" = ""
}
}
# --- Webhook receiver: triggers task-processor Job on Forgejo issue events ---
resource "kubernetes_config_map" "task_webhook" {
metadata {
name = "task-webhook"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
data = {
"server.py" = <<-PYEOF
from http.server import HTTPServer, BaseHTTPRequestHandler
import subprocess, time, json, os
BOT_USER = os.environ.get('FORGEJO_BOT_USER', 'viktor')
class Handler(BaseHTTPRequestHandler):
def do_POST(self):
try:
body = self.rfile.read(int(self.headers.get('Content-Length', 0)))
data = json.loads(body)
action = data.get('action', '')
# Trigger on: new issue, reopened issue, or new comment
trigger = False
if action in ('opened', 'reopened'):
issue = data.get('issue', {})
print(f"Issue #{issue.get('number','?')} {action}: {issue.get('title','?')}")
trigger = True
elif action == 'created' and 'comment' in data:
comment = data.get('comment', {})
commenter = comment.get('user', {}).get('login', '')
# Skip comments from the bot itself to avoid loops
if commenter != BOT_USER:
issue = data.get('issue', {})
print(f"Comment on #{issue.get('number','?')} by {commenter}")
trigger = True
else:
print(f"Skipping own comment on #{data.get('issue',{}).get('number','?')}")
if trigger:
job_name = f"task-processor-{int(time.time())}"
subprocess.run([
'kubectl', 'create', 'job', job_name,
'--from=cronjob/task-processor',
'-n', 'openclaw'
], check=True)
self.send_response(200)
self.end_headers()
self.wfile.write(b'{"ok":true}')
else:
self.send_response(200)
self.end_headers()
self.wfile.write(b'{"ok":true,"skipped":true}')
except Exception as e:
print(f"Error: {e}")
self.send_response(500)
self.end_headers()
self.wfile.write(f'{{"error":"{e}"}}'.encode())
def do_GET(self):
self.send_response(200)
self.end_headers()
self.wfile.write(b'{"status":"ok"}')
def log_message(self, fmt, *args):
print(f"[webhook] {args[0]} {args[1]} {args[2]}")
print("Task webhook receiver listening on :8080")
HTTPServer(('', 8080), Handler).serve_forever()
PYEOF
}
}
resource "kubernetes_service_account" "task_webhook" {
metadata {
name = "task-webhook"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
}
resource "kubernetes_role" "task_webhook" {
metadata {
name = "task-webhook-job-creator"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
rule {
api_groups = ["batch"]
resources = ["jobs", "cronjobs"]
verbs = ["get", "list", "create"]
}
}
resource "kubernetes_role_binding" "task_webhook" {
metadata {
name = "task-webhook-job-creator"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.task_webhook.metadata[0].name
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "Role"
name = kubernetes_role.task_webhook.metadata[0].name
}
}
resource "kubernetes_deployment" "task_webhook" {
metadata {
name = "task-webhook"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "task-webhook"
tier = local.tiers.aux
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "task-webhook"
}
}
template {
metadata {
labels = {
app = "task-webhook"
}
}
spec {
service_account_name = kubernetes_service_account.task_webhook.metadata[0].name
container {
name = "webhook"
image = "python:3-alpine"
command = ["sh", "-c", "apk add --no-cache curl > /dev/null 2>&1 && curl -sfL https://dl.k8s.io/release/v1.34.2/bin/linux/amd64/kubectl -o /usr/local/bin/kubectl && chmod +x /usr/local/bin/kubectl && exec python3 -u /app/server.py"]
port {
container_port = 8080
}
volume_mount {
name = "app"
mount_path = "/app"
}
resources {
requests = {
cpu = "5m"
memory = "64Mi"
}
limits = {
memory = "64Mi"
}
}
}
volume {
name = "app"
config_map {
name = kubernetes_config_map.task_webhook.metadata[0].name
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].template[0].spec[0].dns_config]
}
}
resource "kubernetes_service" "task_webhook" {
metadata {
name = "task-webhook"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "task-webhook"
}
}
spec {
selector = {
app = "task-webhook"
}
port {
port = 80
target_port = 8080
}
}
}
module "task_webhook_ingress" {
source = "../../modules/kubernetes/ingress_factory"
auth = "required"
namespace = kubernetes_namespace.openclaw.metadata[0].name
name = "task-webhook"
tls_secret_name = var.tls_secret_name
host = "task-webhook"
port = 80
external_monitor = false
}
# --- Shared ServiceAccount: grants pod-exec into the openclaw pod ---
# Used by the task_processor CronJob (below). Previously also used by the
# cluster_healthcheck CronJob, which has been decommissioned the local
# `scripts/cluster_healthcheck.sh` is now the single authoritative runner.
resource "kubernetes_service_account" "healthcheck" {
metadata {
name = "cluster-healthcheck"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
}
resource "kubernetes_role" "healthcheck_exec" {
metadata {
name = "healthcheck-pod-exec"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
rule {
api_groups = [""]
resources = ["pods"]
verbs = ["get", "list"]
}
rule {
api_groups = [""]
resources = ["pods/exec"]
verbs = ["create"]
}
}
resource "kubernetes_role_binding" "healthcheck_exec" {
metadata {
name = "healthcheck-pod-exec"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.healthcheck.metadata[0].name
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "Role"
name = kubernetes_role.healthcheck_exec.metadata[0].name
}
}
# --- CronJob: Task processor polls Forgejo issues and triggers OpenClaw ---
resource "kubernetes_cron_job_v1" "task_processor" {
metadata {
name = "task-processor"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "task-processor"
tier = local.tiers.aux
}
}
spec {
schedule = "*/5 * * * *"
concurrency_policy = "Forbid"
failed_jobs_history_limit = 3
successful_jobs_history_limit = 3
job_template {
metadata {
labels = {
app = "task-processor"
}
}
spec {
active_deadline_seconds = 600
backoff_limit = 0
ttl_seconds_after_finished = 86400
template {
metadata {
labels = {
app = "task-processor"
}
}
spec {
service_account_name = kubernetes_service_account.healthcheck.metadata[0].name
restart_policy = "Never"
container {
name = "task-processor"
image = "bitnami/kubectl:latest"
command = ["bash", "-c", <<-EOF
# Find the openclaw pod
POD=$(kubectl get pods -n openclaw -l app=openclaw -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if [ -z "$POD" ]; then
echo "ERROR: OpenClaw pod not found"
exit 1
fi
echo "Executing task processor in pod $POD..."
kubectl exec -n openclaw "$POD" -c openclaw -- \
env FORGEJO_TOKEN="$FORGEJO_TOKEN" \
FORGEJO_URL="http://forgejo.forgejo.svc.cluster.local" \
OPENCLAW_TOKEN="$OPENCLAW_TOKEN" \
OPENCLAW_URL="https://integrate.api.nvidia.com" \
bash /workspace/infra/scripts/task-processor.sh
EOF
]
env {
name = "FORGEJO_TOKEN"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "forgejo_api_token"
}
}
}
env {
name = "OPENCLAW_TOKEN"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "nvidia_api_key"
}
}
}
resources {
requests = {
cpu = "50m"
memory = "64Mi"
}
limits = {
memory = "64Mi"
}
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# --- CronJob: claude-memory → memory-core sync (daily) ---
# Pulls all (non-sensitive) memories from claude-memory's REST API and
# writes them into memory-core's QMD-backed tree at
# /home/node/.openclaw/memory/projects/claude-memory-sync/. Then runs
# `openclaw memory index --force` to rebuild the search index so the
# OpenClaw agent can `memory_search` over the shared knowledge.
#
# Note: the central claude-memory MCP transport (/mcp/mcp) is broken
# on the deployed image (beads code-z1so) — this REST sync is the
# workaround. Once that's fixed we can also wire claude_memory as a
# native MCP server in the mcp.servers block above.
resource "kubernetes_config_map" "memory_sync_script" {
metadata {
name = "memory-sync-script"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
data = {
"memory-sync.py" = file("${path.module}/files/memory-sync.py")
}
}
resource "kubernetes_cron_job_v1" "memory_sync" {
metadata {
name = "memory-sync"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "memory-sync"
tier = local.tiers.aux
}
}
spec {
schedule = "0 3 * * *"
concurrency_policy = "Forbid"
failed_jobs_history_limit = 3
successful_jobs_history_limit = 3
job_template {
metadata {
labels = {
app = "memory-sync"
}
}
spec {
active_deadline_seconds = 600
backoff_limit = 0
ttl_seconds_after_finished = 86400
template {
metadata {
labels = {
app = "memory-sync"
}
}
spec {
# Reuses the SA created for the (decommissioned) cluster
# healthcheck job — already has pods + pods/exec in this ns.
service_account_name = kubernetes_service_account.healthcheck.metadata[0].name
restart_policy = "Never"
container {
name = "memory-sync"
image = "bitnami/kubectl:latest"
command = ["bash", "-c", <<-EOF
set -eu
POD=$(kubectl get pods -n openclaw -l app=openclaw -o jsonpath='{.items[0].metadata.name}')
if [ -z "$POD" ]; then
echo "ERROR: no openclaw pod"
exit 1
fi
echo "syncing into pod $POD ..."
kubectl exec -n openclaw "$POD" -c openclaw -i -- python3 -u - < /scripts/memory-sync.py
echo "reindexing memory-core ..."
kubectl exec -n openclaw "$POD" -c openclaw -- sh -c 'cd /app && node openclaw.mjs memory index --force 2>&1 | tail -20'
echo "memory-sync complete."
EOF
]
volume_mount {
name = "script"
mount_path = "/scripts"
read_only = true
}
resources {
requests = {
cpu = "20m"
memory = "64Mi"
}
limits = {
memory = "64Mi"
}
}
}
volume {
name = "script"
config_map {
name = kubernetes_config_map.memory_sync_script.metadata[0].name
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# --- OpenLobster: Multi-user Telegram AI assistant (trial) ---
resource "kubernetes_persistent_volume_claim" "openlobster_data_proxmox" {
wait_until_bound = false
metadata {
name = "openlobster-data-proxmox"
namespace = kubernetes_namespace.openclaw.metadata[0].name
annotations = {
"resize.topolvm.io/threshold" = "10%"
"resize.topolvm.io/increase" = "100%"
"resize.topolvm.io/storage_limit" = "5Gi"
}
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm"
resources {
requests = {
storage = "1Gi"
}
}
}
lifecycle {
# The autoresizer expands requests.storage up to storage_limit and
# PVCs can't shrink. Without this, every TF apply tries to revert
# to the spec value, K8s rejects the shrink, and the PVC ends up
# in Terminating-but-in-use limbo.
ignore_changes = [spec[0].resources[0].requests]
}
}
resource "random_password" "openlobster_graphql_token" {
length = 32
special = false
}
resource "kubernetes_deployment" "openlobster" {
metadata {
name = "openlobster"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "openlobster"
tier = local.tiers.aux
}
}
spec {
strategy {
type = "Recreate"
}
replicas = 0
selector {
match_labels = {
app = "openlobster"
}
}
template {
metadata {
labels = {
app = "openlobster"
}
}
spec {
# node4 has corrupted containerd content store — avoid it
affinity {
node_affinity {
required_during_scheduling_ignored_during_execution {
node_selector_term {
match_expressions {
key = "kubernetes.io/hostname"
operator = "NotIn"
values = ["k8s-node4"]
}
}
}
}
}
container {
name = "openlobster"
image = "ghcr.io/neirth/openlobster/openlobster:latest"
port {
container_port = 8080
}
env {
name = "OPENLOBSTER_GRAPHQL_AUTH_TOKEN"
value = random_password.openlobster_graphql_token.result
}
env {
name = "OPENLOBSTER_PROVIDERS_ANTHROPIC_API_KEY"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "anthropic_api_key"
}
}
}
env {
name = "OPENLOBSTER_PROVIDERS_ANTHROPIC_MODEL"
value = "claude-sonnet-4-20250514"
}
env {
name = "OPENLOBSTER_CHANNELS_TELEGRAM_TOKEN"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "telegram_bot_token"
}
}
}
env {
name = "OPENLOBSTER_DATABASE_DRIVER"
value = "sqlite"
}
env {
name = "OPENLOBSTER_DATABASE_DSN"
value = "/app/data/openlobster.db"
}
env {
name = "OPENLOBSTER_AGENT_NAME"
value = "Lobster"
}
env {
name = "OPENLOBSTER_MEMORY_BACKEND"
value = "file"
}
volume_mount {
name = "openlobster-data"
mount_path = "/app/data"
}
resources {
requests = {
cpu = "10m"
memory = "64Mi"
}
limits = {
memory = "256Mi"
}
}
}
volume {
name = "openlobster-data"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.openlobster_data_proxmox.metadata[0].name
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
]
}
}
resource "kubernetes_service" "openlobster" {
metadata {
name = "openlobster"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "openlobster"
}
}
spec {
selector = {
app = "openlobster"
}
port {
port = 80
target_port = 8080
}
}
}
module "openlobster_ingress" {
source = "../../modules/kubernetes/ingress_factory"
dns_type = "proxied"
namespace = kubernetes_namespace.openclaw.metadata[0].name
name = "openlobster"
tls_secret_name = var.tls_secret_name
host = "openlobster"
port = 80
auth = "required"
}