infra/stacks/openclaw/main.tf
Viktor Barzin 60d8d54d6e openclaw: v3 flow — know → ask devvm → (rarely) try yourself
Refines the devvm-fallback into an explicit triage flow that the
agent runs on every task. The default path is to ASK devvm-claude
when uncertain — don't brute-force. Most tasks are solvable there.

## The flow

1. Do I KNOW how? Check `memory_recall` and INDEX.md.
2. If not, SSH devvm and ask claude — and crucially, ask it to
   share the steps + credentials needed so I can do it on my own
   next time. Save the answer in openclaw memory.
3. (RARE) If devvm-claude says no, try in-pod. Most likely fail —
   that's OK.

## Storage moved to memory-indexed location

Learnings now live under
`/workspace/memory/projects/openclaw-learned/` (was
`/workspace/learned/`) so memory-core indexes them and
`memory_recall` surfaces them. Layout:

- `scripts/<task>.md`       runnable recipes
- `knowledge/<topic>.md`    decisions, paths, gotchas
- `credentials/<name>.md`   **POINTERS to Vault, never values**

## Credentials = Vault pointers only

Previous v2 design saved cred values to plaintext NFS files. v3
flips to pointer-only: cred file documents the Vault path + fetch
command (`ssh devvm 'vault kv get -field=foo secret/bar'`), the
consumer, and rotation expectations. The secret stays in Vault.

## Init container also migrates

Strips v1/v2/v3 markers from TOOLS.md before re-inserting v3,
moves any files from the legacy `/workspace/learned/` tree into
the new location, removes the empty legacy dir. User edits
outside the markers always survive.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-22 11:20:54 +00:00

2050 lines
74 KiB
HCL
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

variable "tls_secret_name" {
type = string
sensitive = true
}
variable "nfs_server" { type = string }
data "vault_kv_secret_v2" "secrets" {
mount = "secret"
name = "openclaw"
}
locals {
skill_secrets = jsondecode(data.vault_kv_secret_v2.secrets.data["skill_secrets"])
}
resource "kubernetes_namespace" "openclaw" {
metadata {
name = "openclaw"
labels = {
tier = local.tiers.aux
"resource-governance/custom-limitrange" = "true"
"resource-governance/custom-quota" = "true"
"keel.sh/enrolled" = "true"
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.openclaw.metadata[0].name
tls_secret_name = var.tls_secret_name
}
resource "kubernetes_manifest" "external_secret" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "openclaw-secrets"
namespace = "openclaw"
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = {
name = "openclaw-secrets"
}
dataFrom = [{
extract = {
key = "openclaw"
}
}]
}
}
depends_on = [kubernetes_namespace.openclaw]
}
resource "kubernetes_service_account" "openclaw" {
metadata {
name = "openclaw"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
}
resource "kubernetes_cluster_role_binding" "openclaw" {
metadata {
name = "openclaw-cluster-admin"
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.openclaw.metadata[0].name
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = "cluster-admin"
}
}
resource "kubernetes_secret" "ssh_key" {
metadata {
name = "ssh-key"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
data = {
"id_rsa" = data.vault_kv_secret_v2.secrets.data["ssh_key"]
}
type = "generic"
}
resource "kubernetes_config_map" "git_crypt_key" {
metadata {
name = "git-crypt-key"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
data = {
"key" = filebase64("${path.root}/../../.git/git-crypt/keys/default")
}
}
resource "kubernetes_config_map" "openclaw_config" {
metadata {
name = "openclaw-config"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
data = {
"openclaw.json" = jsonencode({
gateway = {
mode = "local"
bind = "lan"
trustedProxies = ["10.0.0.0/8"]
controlUi = {
dangerouslyDisableDeviceAuth = true
dangerouslyAllowHostHeaderOriginFallback = true
}
}
agents = {
defaults = {
contextTokens = 1000000
bootstrapMaxChars = 30000
workspace = "/workspace"
sandbox = {
mode = "off"
}
model = {
# ChatGPT Plus OAuth via openai-codex plugin (account: ancaelena98@gmail.com).
# gpt-5.4-mini is the only mini variant the Codex backend accepts for Plus tier;
# gpt-5-mini / gpt-5.1-codex-mini return model_not_found / "not supported with
# ChatGPT account". Plus rate-card: 1,2007,000 local msgs / 5h on gpt-5.4-mini.
primary = "openai-codex/gpt-5.4-mini"
fallbacks = ["openai-codex/gpt-5.5", "nim/qwen/qwen3-coder-480b-a35b-instruct", "modelrelay/auto-fastest"]
}
models = {
"modelrelay/auto-fastest" = {}
"nim/deepseek-ai/deepseek-v3.2" = {}
"nim/qwen/qwen3.5-397b-a17b" = {}
"nim/mistralai/mistral-large-3-675b-instruct-2512" = {}
"nim/qwen/qwen3-coder-480b-a35b-instruct" = {}
"nim/nvidia/llama-3.1-nemotron-ultra-253b-v1" = {}
"nim/z-ai/glm5" = {}
"llama-as-openai/Llama-4-Maverick-17B-128E-Instruct-FP8" = {}
"llama-as-openai/Llama-4-Scout-17B-16E-Instruct-FP8" = {}
"openrouter/stepfun/step-3.5-flash:free" = {}
"openrouter/arcee-ai/trinity-large-preview:free" = {}
"openai-codex/gpt-5.4-mini" = {}
"openai-codex/gpt-5.5" = {}
}
}
}
tools = {
profile = "full"
deny = []
elevated = {
enabled = true
}
exec = {
host = "gateway"
security = "full"
ask = "off"
pathPrepend = ["/tools", "/workspace"]
}
web = {
search = {
enabled = true
provider = "brave"
apiKey = data.vault_kv_secret_v2.secrets.data["brave_api_key"]
maxResults = 5
}
fetch = {
enabled = true
maxChars = 50000
timeoutSeconds = 30
}
}
}
plugins = {
allow = ["memory-core", "recruiter-api"]
slots = { memory = "memory-core" }
load = {
# /app/extensions is the legacy bundled-plugins path; OpenClaw
# already loads bundled plugins natively (doctor warning).
paths = ["/home/node/.openclaw/extensions"]
}
}
# Note: mcp.servers is configured via `openclaw mcp set` in the main
# container startup command (see below) rather than in this ConfigMap.
# OpenClaw's `doctor --fix` (which runs on every pod start) strips
# bulk-loaded mcp blocks from openclaw.json, but preserves CLI-set
# entries. The CLI is the canonical writer.
commands = {
native = true
nativeSkills = true
}
channels = {
telegram = {
enabled = true
botToken = data.vault_kv_secret_v2.secrets.data["telegram_bot_token"]
dmPolicy = "allowlist"
allowFrom = ["tg:8281953845"]
groupPolicy = "allowlist"
streamMode = "partial"
}
}
models = {
mode = "merge"
providers = {
modelrelay = {
baseUrl = "http://127.0.0.1:7352/v1"
api = "openai-completions"
apiKey = "modelrelay"
models = [
{ id = "auto-fastest", name = "Auto (Fastest)", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
]
}
nim = {
baseUrl = "https://integrate.api.nvidia.com/v1"
api = "openai-completions"
apiKey = data.vault_kv_secret_v2.secrets.data["nvidia_api_key"]
models = [
{ id = "deepseek-ai/deepseek-v3.2", name = "DeepSeek V3.2", reasoning = false, input = ["text"], contextWindow = 164000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "qwen/qwen3.5-397b-a17b", name = "Qwen 3.5", reasoning = true, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "mistralai/mistral-large-3-675b-instruct-2512", name = "Mistral Large 3", reasoning = false, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "qwen/qwen3-coder-480b-a35b-instruct", name = "Qwen 3 Coder", reasoning = false, input = ["text"], contextWindow = 262000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "nvidia/llama-3.1-nemotron-ultra-253b-v1", name = "Nemotron Ultra 253B", reasoning = true, input = ["text"], contextWindow = 128000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "z-ai/glm5", name = "GLM-5", reasoning = false, input = ["text"], contextWindow = 128000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
]
}
openrouter = {
baseUrl = "https://openrouter.ai/api/v1"
api = "openai-completions"
apiKey = data.vault_kv_secret_v2.secrets.data["openrouter_api_key"]
models = [
{ id = "stepfun/step-3.5-flash:free", name = "Step 3.5 Flash", reasoning = true, input = ["text"], contextWindow = 256000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "arcee-ai/trinity-large-preview:free", name = "Trinity Large", reasoning = false, input = ["text"], contextWindow = 131000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
]
}
llama-as-openai = {
baseUrl = "https://api.llama.com/compat/v1"
apiKey = data.vault_kv_secret_v2.secrets.data["llama_api_key"]
api = "openai-completions"
models = [
{ id = "Llama-4-Maverick-17B-128E-Instruct-FP8", name = "Llama 4 Maverick", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
{ id = "Llama-4-Scout-17B-16E-Instruct-FP8", name = "Llama 4 Scout", reasoning = false, input = ["text"], contextWindow = 200000, maxTokens = 16384, cost = { input = 0, output = 0, cacheRead = 0, cacheWrite = 0 } },
]
}
}
}
wizard = {
lastRunAt = "2026-03-01T15:11:54.176Z"
lastRunVersion = "2026.2.9"
lastRunCommand = "configure"
lastRunMode = "local"
}
})
}
}
resource "random_password" "gateway_token" {
length = 32
special = false
}
# Prometheus exporter script — read by the openclaw-exporter sidecar.
# Stdlib-only Python so no pip install at startup. Reads sessions JSONL +
# auth-profiles.json from the NFS-backed openclaw home volume (mounted ro).
resource "kubernetes_config_map" "openclaw_exporter" {
metadata {
name = "openclaw-exporter"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
data = {
"exporter.py" = file("${path.module}/files/exporter.py")
}
}
module "nfs_tools_host" {
source = "../../modules/kubernetes/nfs_volume"
name = "openclaw-tools-host"
namespace = kubernetes_namespace.openclaw.metadata[0].name
nfs_server = "192.168.1.127"
nfs_path = "/srv/nfs/openclaw/tools"
}
resource "kubernetes_persistent_volume_claim" "home_proxmox" {
wait_until_bound = false
metadata {
name = "openclaw-home-proxmox"
namespace = kubernetes_namespace.openclaw.metadata[0].name
annotations = {
"resize.topolvm.io/threshold" = "10%"
"resize.topolvm.io/increase" = "100%"
"resize.topolvm.io/storage_limit" = "5Gi"
}
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm"
resources {
requests = {
storage = "1Gi"
}
}
}
lifecycle {
# The autoresizer expands requests.storage up to storage_limit and
# PVCs can't shrink. Without this, every TF apply tries to revert
# to the spec value, K8s rejects the shrink, and the PVC ends up
# in Terminating-but-in-use limbo.
ignore_changes = [spec[0].resources[0].requests]
}
}
module "nfs_workspace_host" {
source = "../../modules/kubernetes/nfs_volume"
name = "openclaw-workspace-host"
namespace = kubernetes_namespace.openclaw.metadata[0].name
nfs_server = "192.168.1.127"
nfs_path = "/srv/nfs/openclaw/workspace"
}
resource "kubernetes_persistent_volume_claim" "data_proxmox" {
wait_until_bound = false
metadata {
name = "openclaw-data-proxmox"
namespace = kubernetes_namespace.openclaw.metadata[0].name
annotations = {
"resize.topolvm.io/threshold" = "10%"
"resize.topolvm.io/increase" = "100%"
"resize.topolvm.io/storage_limit" = "5Gi"
}
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm"
resources {
requests = {
storage = "1Gi"
}
}
}
lifecycle {
# The autoresizer expands requests.storage up to storage_limit and
# PVCs can't shrink. Without this, every TF apply tries to revert
# to the spec value, K8s rejects the shrink, and the PVC ends up
# in Terminating-but-in-use limbo.
ignore_changes = [spec[0].resources[0].requests]
}
}
## cc-config NFS volume removed — replaced by dotfiles repo clone in init container
## See init_container "install-dotfiles" in the deployment
resource "kubernetes_deployment" "openclaw" {
metadata {
name = "openclaw"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "openclaw"
tier = local.tiers.aux
}
}
spec {
strategy {
type = "Recreate"
}
replicas = 1
selector {
match_labels = {
app = "openclaw"
}
}
template {
metadata {
labels = {
app = "openclaw"
}
annotations = {
"reloader.stakater.com/search" = "true"
# Prometheus auto-discovers pods with these annotations.
# Scraped by the openclaw-exporter sidecar — exposes /metrics on :9099.
"prometheus.io/scrape" = "true"
"prometheus.io/port" = "9099"
"prometheus.io/path" = "/metrics"
}
}
spec {
service_account_name = kubernetes_service_account.openclaw.metadata[0].name
# Init 0: fix /workspace ownership so node user can write
init_container {
name = "fix-workspace-perms"
image = "busybox:1.37"
command = ["sh", "-c", "chown 1000:1000 /workspace"]
volume_mount {
name = "workspace"
mount_path = "/workspace"
}
}
# Init 1: copy openclaw.json from ConfigMap into writable NFS home
init_container {
name = "copy-config"
image = "busybox:1.37"
command = ["sh", "-c", "cp /config/openclaw.json /home/node/.openclaw/openclaw.json && chown 1000:1000 /home/node/.openclaw/openclaw.json"]
volume_mount {
name = "openclaw-config"
mount_path = "/config"
}
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
}
}
# Init 1b: regenerate kubeconfig pointing at the projected SA tokenFile
# so kubectl always reads the fresh, kubelet-rotated token. Without
# this the previously-baked kubeconfig retains a SA token bound to a
# long-dead pod and kubectl returns "must be logged in to the server".
init_container {
name = "setup-kubeconfig"
image = "busybox:1.37"
command = ["sh", "-c", <<-EOT
cat > /home/node/.openclaw/kubeconfig <<'KUBECONFIG_EOF'
apiVersion: v1
kind: Config
clusters:
- cluster:
certificate-authority: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
server: https://kubernetes.default.svc
name: in-cluster
contexts:
- context:
cluster: in-cluster
user: openclaw
namespace: openclaw
name: in-cluster
current-context: in-cluster
users:
- name: openclaw
user:
tokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
KUBECONFIG_EOF
chown 1000:1000 /home/node/.openclaw/kubeconfig
chmod 0644 /home/node/.openclaw/kubeconfig
EOT
]
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
}
}
# Init 2 removed: install-dotfiles init container was cloning dotfiles
# repo via git on every pod start, causing 200+ small NFS writes.
# Dotfiles already exist on NFS at /home/node/.openclaw/dotfiles from
# a previous clone. To update, run git pull manually or via CronJob.
# Init 3: install the recruiter-api OpenClaw plugin from the
# recruiter-responder image into NFS extensions/. Plugin lifecycle
# is coupled to the recruiter-responder image tag — bumping that
# tag re-installs the plugin on next openclaw pod restart.
init_container {
name = "install-recruiter-plugin"
image = "forgejo.viktorbarzin.me/viktor/recruiter-responder:latest"
command = ["sh", "-c", <<-EOT
set -eu
mkdir -p /home/node/.openclaw/extensions/recruiter-api
cp -r /app/openclaw-plugin/. /home/node/.openclaw/extensions/recruiter-api/
chown -R 1000:1000 /home/node/.openclaw/extensions/recruiter-api
echo "recruiter-api plugin installed at /home/node/.openclaw/extensions/recruiter-api"
ls -la /home/node/.openclaw/extensions/recruiter-api
EOT
]
# /home/node/.openclaw is uid 1000 on NFS; recruiter-responder image
# otherwise drops to uid 10001 which can't write or chown. Run as
# root so mkdir + chown succeed.
security_context {
run_as_user = 0
}
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
}
resources {
requests = { cpu = "50m", memory = "64Mi" }
limits = { memory = "128Mi" }
}
}
# Init 4: install host-tools bundle (ssh, vault, jq, ripgrep, tmux, …)
# into /tools/host-tools/ so the in-pod agent reaches CLI parity
# with the dev VM. Upstream OpenClaw image is minimal Debian
# bookworm running as uid 1000 — can't apt-install at runtime.
# Idempotent via marker file; bump suffix to force reinstall.
# See docs/plans/2026-05-22-openclaw-devvm-access-design.md.
init_container {
name = "install-host-tools"
image = "debian:bookworm-slim"
command = ["bash", "-c", <<-EOT
set -euo pipefail
DEST=/tools/host-tools
MARKER="$DEST/.installed-v1"
if [ -f "$MARKER" ]; then
echo "host-tools v1 already installed (skipping)"
exit 0
fi
echo "installing host-tools v1 ..."
rm -rf "$DEST"
mkdir -p "$DEST/root" "$DEST/bin"
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
# debian:bookworm-slim doesn't ship wget/unzip; install
# transiently into this init container's filesystem so we
# can download the static binaries below.
apt-get install -y --no-install-recommends wget unzip ca-certificates
# NOTE: we deliberately do NOT pass --no-install-recommends to
# the download step. ssh links against libgssapi-krb5-2 which
# is a hard Depends but its transitive deps (libkrb5-3 etc.)
# need to come along too. The bundle is a self-contained
# /usr-like tree that the openclaw container can use via
# LD_LIBRARY_PATH, so missing deps = broken binaries.
APT_PKGS="openssh-client dnsutils iputils-ping wget gnupg jq ripgrep fd-find ncdu htop strace tcpdump tmux unzip ca-certificates"
apt-get install -y --download-only $APT_PKGS
for d in /var/cache/apt/archives/*.deb; do
dpkg-deb -x "$d" "$DEST/root/"
done
VAULT_VER=1.18.3
YQ_VER=v4.44.3
wget -qO /tmp/vault.zip \
"https://releases.hashicorp.com/vault/$${VAULT_VER}/vault_$${VAULT_VER}_linux_amd64.zip"
unzip -o /tmp/vault.zip vault -d "$DEST/bin/"
chmod +x "$DEST/bin/vault"
wget -qO "$DEST/bin/yq" \
"https://github.com/mikefarah/yq/releases/download/$${YQ_VER}/yq_linux_amd64"
chmod +x "$DEST/bin/yq"
# Smoke test fail init if any bundled binary has unresolved
# shared-lib deps, so glibc / shared-lib drift surfaces at
# deploy time. We don't run --version because flag support
# varies (older scp returns non-zero, ping/nslookup use weird
# conventions). ldd is the reliable signal: if any "not
# found" appears, the binary won't load when called.
# LD_LIBRARY_PATH points ld.so at the bundled libs (the
# openclaw main container sets the same env).
export PATH="$DEST/root/usr/bin:$DEST/root/usr/sbin:$DEST/root/bin:$DEST/root/sbin:$DEST/bin:$PATH"
export LD_LIBRARY_PATH="$DEST/root/usr/lib/x86_64-linux-gnu:$DEST/root/lib/x86_64-linux-gnu"
for t in ssh scp ssh-keyscan dig host nslookup ping wget gpg jq rg fdfind tmux vault yq; do
bin=$(command -v "$t" 2>/dev/null) || { echo "FAIL: $t not on PATH"; exit 1; }
if ldd "$bin" 2>&1 | grep -q "not found"; then
echo "FAIL: $t has unresolved shared libs:"
ldd "$bin"
exit 1
fi
echo "OK: $t"
done
chown -R 1000:1000 "$DEST"
touch "$MARKER"
echo "host-tools v1 install complete ($(du -sh "$DEST" | cut -f1))"
EOT
]
volume_mount {
name = "tools"
mount_path = "/tools"
}
resources {
requests = { cpu = "100m", memory = "256Mi" }
limits = { memory = "512Mi" }
}
}
# Init 5: write /home/node/.openclaw/.ssh/{id_rsa,config,known_hosts}
# so the agent can `ssh devvm` without device-trust prompts. The
# main container symlinks /home/node/.ssh here at startup so
# the ssh client picks it up via $HOME/.ssh. Installs
# openssh-client transiently into this init container so
# ssh-keyscan works without LD_LIBRARY_PATH gymnastics.
init_container {
name = "setup-ssh-config"
image = "debian:bookworm-slim"
command = ["bash", "-c", <<-EOT
set -euo pipefail
SSH=/home/node/.openclaw/.ssh
MARKER="$SSH/.configured-v1"
if [ -f "$MARKER" ]; then
echo "ssh-config v1 already set up (skipping)"
exit 0
fi
echo "installing openssh-client for ssh-keyscan ..."
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
apt-get install -y --no-install-recommends openssh-client >/dev/null
echo "configuring ssh ..."
mkdir -p "$SSH"
# Copy the secret-mounted private key into ~/.ssh with 0600
# the secret's tmpfs mount has wider perms (1777 + symlinks)
# that openssh refuses.
cp /ssh/id_rsa "$SSH/id_rsa"
chmod 0600 "$SSH/id_rsa"
cat > "$SSH/config" <<'SSH_EOF'
Host devvm
HostName 10.0.10.10
User wizard
IdentityFile ~/.ssh/id_rsa
UserKnownHostsFile ~/.ssh/known_hosts
StrictHostKeyChecking yes
SSH_EOF
chmod 0600 "$SSH/config"
ssh-keyscan -H 10.0.10.10 > "$SSH/known_hosts" 2>/tmp/keyscan.err
if [ ! -s "$SSH/known_hosts" ]; then
echo "ssh-keyscan produced empty known_hosts; stderr:"
cat /tmp/keyscan.err
exit 1
fi
chmod 0644 "$SSH/known_hosts"
chown -R 1000:1000 "$SSH"
touch "$MARKER"
echo "ssh-config v1 set up"
EOT
]
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
}
volume_mount {
name = "ssh-key"
mount_path = "/ssh"
}
resources {
requests = { cpu = "50m", memory = "64Mi" }
limits = { memory = "256Mi" }
}
}
# Init 6: seed devvm-fallback knowledge into FOUR places so the
# OpenClaw agent uses devvm as its default fallback AND learns
# from it (saving scripts/knowledge locally so it grows
# independent over time):
#
# 1. /workspace/TOOLS.md read on every session per AGENTS.md.
# Emphatic "TRY DEVVM BEFORE GIVING UP" + learning protocol.
# 2. /workspace/memory/projects/openclaw-runtime/devvm-fallback.md
# indexed by memory-core (memory_recall surfaces this).
# 3. /workspace/learned/ durable cache of stuff the agent has
# figured out via devvm, so it doesn't re-ask next time.
# INDEX.md is the table of contents the agent checks FIRST.
# 4. /home/node/.openclaw/cc-skills/devvm-fallback/SKILL.md
# formal skill, so the agent has a named entry-point for
# "I'm stuck, what now".
# All sections are marker-delimited (v2) so user edits outside
# the markers survive future pod restarts.
init_container {
name = "seed-devvm-memory-note"
image = "busybox:1.37"
command = ["sh", "-c", <<-EOT
set -eu
# ---- 1. Memory note (memory-core searchable) ----------------
DIR=/workspace/memory/projects/openclaw-runtime
mkdir -p "$DIR"
cat > "$DIR/devvm-fallback.md" <<'NOTE_EOF'
# Devvm is the default path when uncertain
**The flow is: know ask devvm (rarely) try yourself.** Don't
brute-force. If you're not sure HOW to do the task, default to
asking claude on devvm before trying random things in-pod.
Devvm runs as wizard with full home-lab access (Vault token,
kubectl cluster-admin, git repos, Cloudflare CLI) and Claude
Code v2+ at /usr/local/bin/claude.
## One-shot
ssh devvm 'vault kv get -field=brave_api_key secret/openclaw'
ssh devvm 'claude -p "fix frigate's gpu crashloop"'
## Long-running async (tmux on devvm survives pod restart)
ssh devvm openclaw-task new my-task "claude -p --dangerously-skip-permissions 'do X'"
ssh devvm openclaw-task capture my-task
ssh devvm openclaw-task list / status / log / kill / purge
Save everything you learn into /workspace/memory/projects/
openclaw-learned/ so it's indexed by memory-core and you can
`memory_recall` it next time. For credentials, save a POINTER
to the Vault path, never the value. Full protocol in the
`learn-from-tasks` skill.
NOTE_EOF
chown -R 1000:1000 "$DIR"
# ---- 2. TOOLS.md devvm + learning section -------------------
TOOLS=/workspace/TOOLS.md
touch "$TOOLS"
# Strip prior versions (v1, v2, v3) before re-inserting v3.
# User edits outside the markers always survive.
awk '
$0 == "<!-- BEGIN openclaw-devvm-section v1 -->" { skip=1; next }
$0 == "<!-- END openclaw-devvm-section v1 -->" { skip=0; next }
$0 == "<!-- BEGIN openclaw-devvm-section v2 -->" { skip=1; next }
$0 == "<!-- END openclaw-devvm-section v2 -->" { skip=0; next }
$0 == "<!-- BEGIN openclaw-devvm-section v3 -->" { skip=1; next }
$0 == "<!-- END openclaw-devvm-section v3 -->" { skip=0; next }
!skip { print }
' "$TOOLS" > "$TOOLS.tmp"
cat >> "$TOOLS.tmp" <<'TOOLS_EOF'
<!-- BEGIN openclaw-devvm-section v3 -->
## THE FLOW when you get a task, run this triage every time
**Don't brute-force. Most tasks can be solved by claude on
devvm. Default to asking it before trying random things.**
```
new task 1. Do I KNOW how?
(memory_recall +
check INDEX.md)
yes no
Just do it.
2. Ask claude on devvm:
- can you do this?
- if yes, do it AND
share the steps and
any credentials
needed
yes no
Devvm-claude does it. 3. (RARE) Let me try
Save what it told me in-pod. Most likely
into openclaw memory I will fail; that's
(or a Vault pointer for OK that's the
credentials). point.
```
Step 1: **Know?** Try `memory_recall "<topic>"` and look at
`/workspace/memory/projects/openclaw-learned/INDEX.md`. If
you (or a past session) already figured this out, use it.
Step 2: **Ask devvm.** Default path when uncertain. Use this
exact prompt shape explicitly ask for the recipe:
ssh devvm 'claude -p --dangerously-skip-permissions \
"I need to do X (give me context). \
Can you do this? If yes: do it now, then tell me \
the steps you took, the commands I would run on my \
own next time, and what credentials/paths I need. \
I will save your answer so next time I don't ask."'
For multi-step work that takes >2 min, dispatch async:
ssh devvm openclaw-task new <id> "<command>"
ssh devvm openclaw-task capture <id>
**Then SAVE everything** to
`/workspace/memory/projects/openclaw-learned/<topic>.md` so
memory_recall surfaces it next time (see Step 1).
Step 3: **Try in-pod.** Only when devvm-claude says it can't
do the task either. This is rare. If you reach this step,
be honest with the user and if you DO find a solution,
save it under openclaw-learned/ so future you doesn't redo
the discovery.
## ALSO: after every task, introspect save a faster way
Especially early on, almost everything is a teaching
moment. After a non-trivial task whether via devvm or
self pause and ask:
- Likely to recur?
- Took >1 round-trip / detour / dig?
- Learned a non-obvious command, path, credential source,
or workflow?
If yes save it now (see Saving below). The 2 minutes you
spend now save 10 minutes every recurrence. **Bias
aggressively toward saving** while you're still learning
the environment.
## Saving where it goes
All learnings live under
`/workspace/memory/projects/openclaw-learned/` because that
path is indexed by memory-core (the same daily CronJob
that pulls from claude-memory). After dropping a file
there, `memory_recall "<keywords>"` will surface it.
File-naming conventions:
- **Script / recipe**
`openclaw-learned/scripts/<task>.md`
Inline a fenced code block with the command(s). Header
must include WHAT, WHEN learned, HOW (the devvm prompt
you used, or "self"), and SOURCE (if a credential was
involved, point to the Vault path).
- **Knowledge** (decisions, paths, gotchas)
`openclaw-learned/knowledge/<topic>.md`
- **Credential POINTER**
`openclaw-learned/credentials/<name>.md`
**NEVER stores the value.** Documents:
- Vault path + field
- The exact command to fetch (e.g.,
`ssh devvm 'vault kv get -field=foo secret/bar'`)
- What service/task uses it
- Rotation expectations
That way the secret stays in Vault, you stay safe, but
you skip the "how do I get this credential" rediscovery
dance.
Finally, add a row to
`/workspace/memory/projects/openclaw-learned/INDEX.md` so
the table-of-contents reflects the new entry.
## devvm wizard@10.0.10.10 (pre-wired, zero-config)
SSH key at ~/.ssh/id_rsa, host pre-trusted, `ssh devvm`
Just Works. No password prompts, no host-trust prompts.
<!-- END openclaw-devvm-section v3 -->
TOOLS_EOF
mv "$TOOLS.tmp" "$TOOLS"
chown 1000:1000 "$TOOLS"
# ---- 3. Memory-indexed learned/ scaffold --------------------
LEARNED=/workspace/memory/projects/openclaw-learned
mkdir -p "$LEARNED/scripts" "$LEARNED/knowledge" "$LEARNED/credentials"
chmod 0755 "$LEARNED/credentials" # pointers only, not secrets
if [ ! -f "$LEARNED/INDEX.md" ]; then
cat > "$LEARNED/INDEX.md" <<'INDEX_EOF'
# openclaw-learned index
Things I've figured out (via devvm-claude or self). Check
here FIRST `memory_recall "<topic>"` also surfaces these.
| Task | Type | Path | Source | Added |
|------|------|------|--------|-------|
| _example: post to slack_ | script | scripts/slack-post.md | devvm-claude | 2026-05-22 |
## Layout
- `scripts/<task>.md` runnable recipes
- `knowledge/<topic>.md` decisions, paths, gotchas
- `credentials/<name>.md` POINTERS to Vault, never values
## When you save something new
1. Drop the file in the right slot above.
2. Header: WHAT, WHEN learned, HOW (verbatim devvm prompt
or "self"), SOURCE (Vault path if a credential).
3. Add a row to this INDEX.
4. (Optional) `node /app/openclaw.mjs memory index --force`
to make it immediately searchable; the daily memory-sync
CronJob re-indexes anyway.
See the `learn-from-tasks` skill for full protocol.
INDEX_EOF
fi
chown -R 1000:1000 "$LEARNED"
# Migrate v2 scaffold at /workspace/learned/ into the new
# memory-indexed location. Only move actual content if
# the directory is still empty (no learnings saved yet),
# remove it so the agent isn't confused by two locations.
if [ -d /workspace/learned ]; then
for sub in scripts knowledge credentials; do
if [ -d "/workspace/learned/$sub" ]; then
for f in "/workspace/learned/$sub"/*; do
[ -e "$f" ] || continue
mv "$f" "$LEARNED/$sub/" 2>/dev/null || true
done
fi
done
# Pull INDEX.md rows over too (manual review leave both
# for now and let the agent clean up). Then remove v2 dir
# if empty.
rmdir /workspace/learned/scripts /workspace/learned/knowledge \
/workspace/learned/credentials /workspace/learned \
2>/dev/null || true
fi
# ---- 4. Skill at cc-skills/learn-from-tasks/ ----------------
SKILL_DIR=/home/node/.openclaw/cc-skills/learn-from-tasks
mkdir -p "$SKILL_DIR"
cat > "$SKILL_DIR/SKILL.md" <<'SKILL_EOF'
# learn-from-tasks
**The flow is: know ask devvm (rarely) try yourself.**
Don't brute-force. If you don't immediately know HOW to do
the task, default to asking claude on devvm. Most tasks
can be solved there. Over time, as your learnings
accumulate under `/workspace/memory/projects/openclaw-
learned/`, you'll grow independent. That's the goal
devvm is the teacher, you're the student.
## Triage every new task
1. **Do I know how?**
`memory_recall "<topic>"` and/or scan
`/workspace/memory/projects/openclaw-learned/INDEX.md`.
If yes just do it.
2. **If not, ASK DEVVM.** This is the default path.
ssh devvm 'claude -p --dangerously-skip-permissions \
"I need to do X. Can you do this? If yes: do it \
now, then tell me the steps you took, the \
commands I should run on my own next time, and \
what credentials/paths I need. I will save your \
answer so next time I don't ask."'
For multi-step work (>2 min), use async:
ssh devvm openclaw-task new <id> "<command>"
ssh devvm openclaw-task claude <id> "<prompt>"
ssh devvm openclaw-task capture / log / status
When devvm-claude responds, **save what it told you**
(see Save protocol).
3. **Only if devvm-claude can't either**, try in-pod. This
is rare. Be honest with the user about the uncertainty.
If you find a way, save it just like step 2.
## After-task introspect (every non-trivial task)
Whether you used devvm or self-figured, pause at the end
and ask:
- Likely to recur?
- Took >1 round-trip / dig / detour?
- Non-obvious command, path, credential, workflow?
If yes save now. **Bias aggressively toward saving
while you're still learning the environment.**
## Save protocol
Everything goes under
`/workspace/memory/projects/openclaw-learned/` because
memory-core indexes that tree (it's how `memory_recall`
surfaces results).
1. Pick the slot:
- Script / recipe `scripts/<task>.md` (fenced code
block; agent reads + runs from here)
- Knowledge `knowledge/<topic>.md`
- Credential `credentials/<name>.md`
**POINTER ONLY, never the value**. Document the
Vault path + the fetch command (e.g.,
`ssh devvm 'vault kv get -field=foo secret/bar'`),
the consuming service, and rotation expectations.
2. Header in the file: WHAT, WHEN learned, HOW (verbatim
devvm prompt, or "self"), SOURCE (Vault path if cred).
3. Add a row to INDEX.md.
4. Test the saved recipe end-to-end before considering
the task done.
## Never save
- Trivial one-liners that don't actually save time.
- Values of credentials (use the pointer pattern).
- Things that change every run (ephemeral pod names,
random tokens, timestamps).
SKILL_EOF
chown -R 1000:1000 "$SKILL_DIR"
echo "devvm-fallback + learning loop v3 seeded:"
echo " - memory note: $DIR/devvm-fallback.md"
echo " - TOOLS.md v3 (explicit 3-step flow, memory-indexed saves)"
echo " - openclaw-learned/ at $LEARNED"
echo " - skill: $SKILL_DIR/SKILL.md"
EOT
]
volume_mount {
name = "workspace"
mount_path = "/workspace"
}
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
}
resources {
requests = { cpu = "10m", memory = "32Mi" }
limits = { memory = "64Mi" }
}
}
# Main container: OpenClaw
container {
name = "openclaw"
image = "ghcr.io/openclaw/openclaw:2026.5.4"
# Startup sequence:
# 1. doctor --fix repair sessions/state (also resets some config)
# 2. models set pin gpt-5.4-mini (doctor auto-promotes to gpt-5-pro otherwise)
# 3. mcp set <name> register MCP servers via the CLI (the
# ConfigMap-baked mcp.servers block gets
# stripped by doctor --fix, but CLI-written
# entries persist). Values: ha URL from
# $HA_SOFIA_MCP_URL env (Vault-sourced),
# others hard-coded.
# 4. gateway exec into the gateway process
command = ["sh", "-c", <<-EOC
# Symlink /home/node/.ssh persistent .ssh so the ssh client
# finds id_rsa/config/known_hosts via $HOME/.ssh. HOME is
# /home/node (image overlay), .ssh files live on the PVC
# at /home/node/.openclaw/.ssh (set up by init 5).
ln -sfn /home/node/.openclaw/.ssh /home/node/.ssh
node openclaw.mjs doctor --fix 2>/dev/null
node openclaw.mjs models set openai-codex/gpt-5.4-mini 2>/dev/null
node openclaw.mjs mcp set ha "{\"url\":\"$HA_SOFIA_MCP_URL\",\"transport\":\"streamable-http\"}" 2>/dev/null
node openclaw.mjs mcp set context7 '{"command":"npx","args":["-y","@upstash/context7-mcp"]}' 2>/dev/null
node openclaw.mjs mcp set playwright '{"url":"http://localhost:3000/mcp","transport":"streamable-http"}' 2>/dev/null
# doctor --fix overwrites plugins.allow with its bundled-plugins
# list. Re-add our third-party plugin to the allow list via
# `config patch`, then enable it. (Same pattern as mcp set above.)
echo '{"plugins":{"allow":["memory-core","recruiter-api","telegram","openrouter","brave","openai","codex"]}}' \
| node openclaw.mjs config patch --stdin 2>/dev/null || true
node openclaw.mjs plugins enable recruiter-api 2>/dev/null || true
# Reindex memory-core so the seeded devvm-fallback note (and
# anything else dropped under /workspace/memory/) is searchable
# on first boot; daily memory-sync CronJob also keeps it indexed.
node openclaw.mjs memory index --force 2>/dev/null || true
exec node openclaw.mjs gateway --allow-unconfigured --bind lan
EOC
]
port {
container_port = 18789
}
readiness_probe {
tcp_socket {
port = 18789
}
initial_delay_seconds = 30
period_seconds = 10
}
env {
name = "NODE_OPTIONS"
value = "--max-old-space-size=1536"
}
env {
name = "OPENCLAW_GATEWAY_TOKEN"
value = random_password.gateway_token.result
}
env {
name = "PATH"
# Host-tools bundle (installed by init 4: install-host-tools)
# comes first so ssh/scp/dig/vault/jq/etc. resolve to the
# extracted Debian binaries + the static-binary downloads.
# /bin + /sbin are needed because iputils-ping installs ping
# under /bin (not /usr/bin) on Debian.
value = "/tools/host-tools/root/usr/bin:/tools/host-tools/root/usr/sbin:/tools/host-tools/root/bin:/tools/host-tools/root/sbin:/tools/host-tools/bin:/tools:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
}
env {
# Point ld.so at the bundled libs so the host-tools binaries
# find their shared-lib deps (libgssapi_krb5, libkrb5, etc.).
# Both base images are bookworm so the libs match the
# openclaw image's libc/libssl no ABI conflicts expected.
name = "LD_LIBRARY_PATH"
value = "/tools/host-tools/root/usr/lib/x86_64-linux-gnu:/tools/host-tools/root/lib/x86_64-linux-gnu"
}
env {
name = "TF_VAR_prod"
value = "true"
}
env {
name = "KUBECONFIG"
value = "/home/node/.openclaw/kubeconfig"
}
env {
name = "GIT_CONFIG_GLOBAL"
value = "/home/node/.openclaw/.gitconfig"
}
# Skill secrets - Home Assistant
env {
name = "HOME_ASSISTANT_URL"
value = "https://ha-london.viktorbarzin.me"
}
env {
name = "HOME_ASSISTANT_TOKEN"
value = local.skill_secrets["home_assistant_token"]
}
env {
name = "HOME_ASSISTANT_SOFIA_URL"
value = "https://ha-sofia.viktorbarzin.me"
}
env {
name = "HOME_ASSISTANT_SOFIA_TOKEN"
value = local.skill_secrets["home_assistant_sofia_token"]
}
# MCP URL for ha-mcp add-on on ha-sofia (secret-path auth).
# Consumed in the startup command by `openclaw mcp set ha ...`.
env {
name = "HA_SOFIA_MCP_URL"
value = data.vault_kv_secret_v2.secrets.data["ha_sofia_mcp_url"]
}
# Skill secrets - Uptime Kuma
env {
name = "UPTIME_KUMA_PASSWORD"
value = local.skill_secrets["uptime_kuma_password"]
}
# Memory API
env {
name = "MEMORY_API_URL"
value = "http://claude-memory.claude-memory.svc.cluster.local"
}
env {
name = "MEMORY_API_KEY"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "claude_memory_api_key"
}
}
}
# Recruiter Responder API consumed by the recruiter-api plugin
# (mounted into /home/node/.openclaw/extensions/recruiter-api/ via
# the install-recruiter-plugin init container below).
env {
name = "RECRUITER_RESPONDER_URL"
value = "http://recruiter-responder.recruiter-responder.svc.cluster.local:8080"
}
env {
name = "RECRUITER_RESPONDER_TOKEN"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "recruiter_responder_bearer_token"
optional = true
}
}
}
# Telegram chat ID for the recruiter-api plugin's announcement loop.
env {
name = "VIKTOR_CHAT_ID"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "viktor_chat_id"
optional = true
}
}
}
# Python packages path for skills
env {
name = "PYTHONPATH"
value = "/tools/python-libs"
}
volume_mount {
name = "tools"
mount_path = "/tools"
}
volume_mount {
name = "workspace"
mount_path = "/workspace"
}
volume_mount {
name = "data"
mount_path = "/data"
}
volume_mount {
name = "ssh-key"
mount_path = "/ssh"
}
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
}
resources {
limits = {
memory = "2Gi"
}
requests = {
cpu = "100m"
memory = "2Gi"
}
}
}
# Sidecar: playwright-mcp headless browser for agents
container {
name = "playwright-mcp"
image = "docker.io/viktorbarzin/playwright-mcp:v1"
args = ["--headless", "--browser", "chromium", "--no-sandbox", "--port", "3000", "--host", "0.0.0.0"]
port {
container_port = 3000
}
resources {
requests = {
cpu = "50m"
memory = "256Mi"
}
limits = {
memory = "512Mi"
}
}
}
# Sidecar: openclaw-exporter Prometheus exporter for Codex/OAuth usage.
# Reads sessions JSONL files + auth-profiles.json, exposes /metrics on :9099.
# Stdlib-only Python; no pip install at startup.
container {
name = "openclaw-exporter"
image = "docker.io/library/python:3.12-slim"
command = ["python3", "/scripts/exporter.py"]
port {
container_port = 9099
name = "metrics"
}
env {
name = "OPENCLAW_HOME"
value = "/home/node/.openclaw"
}
env {
name = "METRICS_PORT"
value = "9099"
}
volume_mount {
name = "openclaw-exporter-script"
mount_path = "/scripts"
read_only = true
}
volume_mount {
name = "openclaw-home"
mount_path = "/home/node/.openclaw"
read_only = true
}
readiness_probe {
http_get {
path = "/healthz"
port = 9099
}
initial_delay_seconds = 5
period_seconds = 30
}
resources {
requests = {
cpu = "10m"
memory = "64Mi"
}
limits = {
memory = "128Mi"
}
}
}
# Sidecar: modelrelay auto-routes to fastest healthy free model
container {
name = "modelrelay"
image = "docker.io/library/node:22-alpine"
command = ["sh", "-c", <<-EOF
if [ ! -f /tools/modelrelay/node_modules/.package-lock.json ]; then
mkdir -p /tools/modelrelay
cd /tools/modelrelay
npm init -y > /dev/null 2>&1
npm install modelrelay > /dev/null 2>&1
fi
cd /tools/modelrelay
exec npx modelrelay --port 7352
EOF
]
port {
container_port = 7352
}
env {
name = "NVIDIA_API_KEY"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "nvidia_api_key"
}
}
}
env {
name = "OPENROUTER_API_KEY"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "openrouter_api_key"
}
}
}
volume_mount {
name = "tools"
mount_path = "/tools"
}
resources {
limits = {
memory = "256Mi"
}
requests = {
cpu = "25m"
memory = "128Mi"
}
}
}
volume {
name = "tools"
persistent_volume_claim {
claim_name = module.nfs_tools_host.claim_name
}
}
volume {
name = "openclaw-home"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.home_proxmox.metadata[0].name
}
}
volume {
name = "workspace"
persistent_volume_claim {
claim_name = module.nfs_workspace_host.claim_name
}
}
volume {
name = "data"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.data_proxmox.metadata[0].name
}
}
volume {
name = "ssh-key"
secret {
secret_name = kubernetes_secret.ssh_key.metadata[0].name
default_mode = "0600"
}
}
volume {
name = "openclaw-config"
config_map {
name = kubernetes_config_map.openclaw_config.metadata[0].name
}
}
volume {
name = "openclaw-exporter-script"
config_map {
name = kubernetes_config_map.openclaw_exporter.metadata[0].name
default_mode = "0555"
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].template[0].spec[0].dns_config]
}
}
resource "kubernetes_service" "openclaw" {
metadata {
name = "openclaw"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "openclaw"
}
}
spec {
selector = {
app = "openclaw"
}
port {
port = 80
target_port = 18789
}
}
}
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
dns_type = "non-proxied"
namespace = kubernetes_namespace.openclaw.metadata[0].name
name = "openclaw"
tls_secret_name = var.tls_secret_name
port = 80
auth = "required"
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "OpenClaw"
"gethomepage.dev/description" = "AI assistant"
"gethomepage.dev/icon" = "openai.png"
"gethomepage.dev/group" = "AI & Data"
"gethomepage.dev/pod-selector" = ""
}
}
# --- Webhook receiver: triggers task-processor Job on Forgejo issue events ---
resource "kubernetes_config_map" "task_webhook" {
metadata {
name = "task-webhook"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
data = {
"server.py" = <<-PYEOF
from http.server import HTTPServer, BaseHTTPRequestHandler
import subprocess, time, json, os
BOT_USER = os.environ.get('FORGEJO_BOT_USER', 'viktor')
class Handler(BaseHTTPRequestHandler):
def do_POST(self):
try:
body = self.rfile.read(int(self.headers.get('Content-Length', 0)))
data = json.loads(body)
action = data.get('action', '')
# Trigger on: new issue, reopened issue, or new comment
trigger = False
if action in ('opened', 'reopened'):
issue = data.get('issue', {})
print(f"Issue #{issue.get('number','?')} {action}: {issue.get('title','?')}")
trigger = True
elif action == 'created' and 'comment' in data:
comment = data.get('comment', {})
commenter = comment.get('user', {}).get('login', '')
# Skip comments from the bot itself to avoid loops
if commenter != BOT_USER:
issue = data.get('issue', {})
print(f"Comment on #{issue.get('number','?')} by {commenter}")
trigger = True
else:
print(f"Skipping own comment on #{data.get('issue',{}).get('number','?')}")
if trigger:
job_name = f"task-processor-{int(time.time())}"
subprocess.run([
'kubectl', 'create', 'job', job_name,
'--from=cronjob/task-processor',
'-n', 'openclaw'
], check=True)
self.send_response(200)
self.end_headers()
self.wfile.write(b'{"ok":true}')
else:
self.send_response(200)
self.end_headers()
self.wfile.write(b'{"ok":true,"skipped":true}')
except Exception as e:
print(f"Error: {e}")
self.send_response(500)
self.end_headers()
self.wfile.write(f'{{"error":"{e}"}}'.encode())
def do_GET(self):
self.send_response(200)
self.end_headers()
self.wfile.write(b'{"status":"ok"}')
def log_message(self, fmt, *args):
print(f"[webhook] {args[0]} {args[1]} {args[2]}")
print("Task webhook receiver listening on :8080")
HTTPServer(('', 8080), Handler).serve_forever()
PYEOF
}
}
resource "kubernetes_service_account" "task_webhook" {
metadata {
name = "task-webhook"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
}
resource "kubernetes_role" "task_webhook" {
metadata {
name = "task-webhook-job-creator"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
rule {
api_groups = ["batch"]
resources = ["jobs", "cronjobs"]
verbs = ["get", "list", "create"]
}
}
resource "kubernetes_role_binding" "task_webhook" {
metadata {
name = "task-webhook-job-creator"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.task_webhook.metadata[0].name
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "Role"
name = kubernetes_role.task_webhook.metadata[0].name
}
}
resource "kubernetes_deployment" "task_webhook" {
metadata {
name = "task-webhook"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "task-webhook"
tier = local.tiers.aux
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "task-webhook"
}
}
template {
metadata {
labels = {
app = "task-webhook"
}
}
spec {
service_account_name = kubernetes_service_account.task_webhook.metadata[0].name
container {
name = "webhook"
image = "python:3-alpine"
command = ["sh", "-c", "apk add --no-cache curl > /dev/null 2>&1 && curl -sfL https://dl.k8s.io/release/v1.34.2/bin/linux/amd64/kubectl -o /usr/local/bin/kubectl && chmod +x /usr/local/bin/kubectl && exec python3 -u /app/server.py"]
port {
container_port = 8080
}
volume_mount {
name = "app"
mount_path = "/app"
}
resources {
requests = {
cpu = "5m"
memory = "64Mi"
}
limits = {
memory = "64Mi"
}
}
}
volume {
name = "app"
config_map {
name = kubernetes_config_map.task_webhook.metadata[0].name
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].template[0].spec[0].dns_config]
}
}
resource "kubernetes_service" "task_webhook" {
metadata {
name = "task-webhook"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "task-webhook"
}
}
spec {
selector = {
app = "task-webhook"
}
port {
port = 80
target_port = 8080
}
}
}
module "task_webhook_ingress" {
source = "../../modules/kubernetes/ingress_factory"
auth = "required"
namespace = kubernetes_namespace.openclaw.metadata[0].name
name = "task-webhook"
tls_secret_name = var.tls_secret_name
host = "task-webhook"
port = 80
external_monitor = false
}
# --- Shared ServiceAccount: grants pod-exec into the openclaw pod ---
# Used by the task_processor CronJob (below). Previously also used by the
# cluster_healthcheck CronJob, which has been decommissioned the local
# `scripts/cluster_healthcheck.sh` is now the single authoritative runner.
resource "kubernetes_service_account" "healthcheck" {
metadata {
name = "cluster-healthcheck"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
}
resource "kubernetes_role" "healthcheck_exec" {
metadata {
name = "healthcheck-pod-exec"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
rule {
api_groups = [""]
resources = ["pods"]
verbs = ["get", "list"]
}
rule {
api_groups = [""]
resources = ["pods/exec"]
verbs = ["create"]
}
}
resource "kubernetes_role_binding" "healthcheck_exec" {
metadata {
name = "healthcheck-pod-exec"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.healthcheck.metadata[0].name
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "Role"
name = kubernetes_role.healthcheck_exec.metadata[0].name
}
}
# --- CronJob: Task processor polls Forgejo issues and triggers OpenClaw ---
resource "kubernetes_cron_job_v1" "task_processor" {
metadata {
name = "task-processor"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "task-processor"
tier = local.tiers.aux
}
}
spec {
schedule = "*/5 * * * *"
concurrency_policy = "Forbid"
failed_jobs_history_limit = 3
successful_jobs_history_limit = 3
job_template {
metadata {
labels = {
app = "task-processor"
}
}
spec {
active_deadline_seconds = 600
backoff_limit = 0
ttl_seconds_after_finished = 86400
template {
metadata {
labels = {
app = "task-processor"
}
}
spec {
service_account_name = kubernetes_service_account.healthcheck.metadata[0].name
restart_policy = "Never"
container {
name = "task-processor"
image = "bitnami/kubectl:latest"
command = ["bash", "-c", <<-EOF
# Find the openclaw pod
POD=$(kubectl get pods -n openclaw -l app=openclaw -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if [ -z "$POD" ]; then
echo "ERROR: OpenClaw pod not found"
exit 1
fi
echo "Executing task processor in pod $POD..."
kubectl exec -n openclaw "$POD" -c openclaw -- \
env FORGEJO_TOKEN="$FORGEJO_TOKEN" \
FORGEJO_URL="http://forgejo.forgejo.svc.cluster.local" \
OPENCLAW_TOKEN="$OPENCLAW_TOKEN" \
OPENCLAW_URL="https://integrate.api.nvidia.com" \
bash /workspace/infra/scripts/task-processor.sh
EOF
]
env {
name = "FORGEJO_TOKEN"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "forgejo_api_token"
}
}
}
env {
name = "OPENCLAW_TOKEN"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "nvidia_api_key"
}
}
}
resources {
requests = {
cpu = "50m"
memory = "64Mi"
}
limits = {
memory = "64Mi"
}
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# --- CronJob: claude-memory memory-core sync (daily) ---
# Pulls all (non-sensitive) memories from claude-memory's REST API and
# writes them into memory-core's QMD-backed tree at
# /home/node/.openclaw/memory/projects/claude-memory-sync/. Then runs
# `openclaw memory index --force` to rebuild the search index so the
# OpenClaw agent can `memory_search` over the shared knowledge.
#
# Note: the central claude-memory MCP transport (/mcp/mcp) is broken
# on the deployed image (beads code-z1so) this REST sync is the
# workaround. Once that's fixed we can also wire claude_memory as a
# native MCP server in the mcp.servers block above.
resource "kubernetes_config_map" "memory_sync_script" {
metadata {
name = "memory-sync-script"
namespace = kubernetes_namespace.openclaw.metadata[0].name
}
data = {
"memory-sync.py" = file("${path.module}/files/memory-sync.py")
}
}
resource "kubernetes_cron_job_v1" "memory_sync" {
metadata {
name = "memory-sync"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "memory-sync"
tier = local.tiers.aux
}
}
spec {
schedule = "0 3 * * *"
concurrency_policy = "Forbid"
failed_jobs_history_limit = 3
successful_jobs_history_limit = 3
job_template {
metadata {
labels = {
app = "memory-sync"
}
}
spec {
active_deadline_seconds = 600
backoff_limit = 0
ttl_seconds_after_finished = 86400
template {
metadata {
labels = {
app = "memory-sync"
}
}
spec {
# Reuses the SA created for the (decommissioned) cluster
# healthcheck job already has pods + pods/exec in this ns.
service_account_name = kubernetes_service_account.healthcheck.metadata[0].name
restart_policy = "Never"
container {
name = "memory-sync"
image = "bitnami/kubectl:latest"
command = ["bash", "-c", <<-EOF
set -eu
POD=$(kubectl get pods -n openclaw -l app=openclaw -o jsonpath='{.items[0].metadata.name}')
if [ -z "$POD" ]; then
echo "ERROR: no openclaw pod"
exit 1
fi
echo "syncing into pod $POD ..."
kubectl exec -n openclaw "$POD" -c openclaw -i -- python3 -u - < /scripts/memory-sync.py
echo "reindexing memory-core ..."
kubectl exec -n openclaw "$POD" -c openclaw -- sh -c 'cd /app && node openclaw.mjs memory index --force 2>&1 | tail -20'
echo "memory-sync complete."
EOF
]
volume_mount {
name = "script"
mount_path = "/scripts"
read_only = true
}
resources {
requests = {
cpu = "20m"
memory = "64Mi"
}
limits = {
memory = "64Mi"
}
}
}
volume {
name = "script"
config_map {
name = kubernetes_config_map.memory_sync_script.metadata[0].name
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# --- OpenLobster: Multi-user Telegram AI assistant (trial) ---
resource "kubernetes_persistent_volume_claim" "openlobster_data_proxmox" {
wait_until_bound = false
metadata {
name = "openlobster-data-proxmox"
namespace = kubernetes_namespace.openclaw.metadata[0].name
annotations = {
"resize.topolvm.io/threshold" = "10%"
"resize.topolvm.io/increase" = "100%"
"resize.topolvm.io/storage_limit" = "5Gi"
}
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm"
resources {
requests = {
storage = "1Gi"
}
}
}
lifecycle {
# The autoresizer expands requests.storage up to storage_limit and
# PVCs can't shrink. Without this, every TF apply tries to revert
# to the spec value, K8s rejects the shrink, and the PVC ends up
# in Terminating-but-in-use limbo.
ignore_changes = [spec[0].resources[0].requests]
}
}
resource "random_password" "openlobster_graphql_token" {
length = 32
special = false
}
resource "kubernetes_deployment" "openlobster" {
metadata {
name = "openlobster"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "openlobster"
tier = local.tiers.aux
}
}
spec {
strategy {
type = "Recreate"
}
replicas = 0
selector {
match_labels = {
app = "openlobster"
}
}
template {
metadata {
labels = {
app = "openlobster"
}
}
spec {
# node4 has corrupted containerd content store avoid it
affinity {
node_affinity {
required_during_scheduling_ignored_during_execution {
node_selector_term {
match_expressions {
key = "kubernetes.io/hostname"
operator = "NotIn"
values = ["k8s-node4"]
}
}
}
}
}
container {
name = "openlobster"
image = "ghcr.io/neirth/openlobster/openlobster:latest"
port {
container_port = 8080
}
env {
name = "OPENLOBSTER_GRAPHQL_AUTH_TOKEN"
value = random_password.openlobster_graphql_token.result
}
env {
name = "OPENLOBSTER_PROVIDERS_ANTHROPIC_API_KEY"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "anthropic_api_key"
}
}
}
env {
name = "OPENLOBSTER_PROVIDERS_ANTHROPIC_MODEL"
value = "claude-sonnet-4-20250514"
}
env {
name = "OPENLOBSTER_CHANNELS_TELEGRAM_TOKEN"
value_from {
secret_key_ref {
name = "openclaw-secrets"
key = "telegram_bot_token"
}
}
}
env {
name = "OPENLOBSTER_DATABASE_DRIVER"
value = "sqlite"
}
env {
name = "OPENLOBSTER_DATABASE_DSN"
value = "/app/data/openlobster.db"
}
env {
name = "OPENLOBSTER_AGENT_NAME"
value = "Lobster"
}
env {
name = "OPENLOBSTER_MEMORY_BACKEND"
value = "file"
}
volume_mount {
name = "openlobster-data"
mount_path = "/app/data"
}
resources {
requests = {
cpu = "10m"
memory = "64Mi"
}
limits = {
memory = "256Mi"
}
}
}
volume {
name = "openlobster-data"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.openlobster_data_proxmox.metadata[0].name
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
metadata[0].annotations["keel.sh/policy"],
metadata[0].annotations["keel.sh/trigger"],
metadata[0].annotations["keel.sh/pollSchedule"], # KYVERNO_LIFECYCLE_V2
]
}
}
resource "kubernetes_service" "openlobster" {
metadata {
name = "openlobster"
namespace = kubernetes_namespace.openclaw.metadata[0].name
labels = {
app = "openlobster"
}
}
spec {
selector = {
app = "openlobster"
}
port {
port = 80
target_port = 8080
}
}
}
module "openlobster_ingress" {
source = "../../modules/kubernetes/ingress_factory"
dns_type = "proxied"
namespace = kubernetes_namespace.openclaw.metadata[0].name
name = "openlobster"
tls_secret_name = var.tls_secret_name
host = "openlobster"
port = 80
auth = "required"
}