recruiter-responder: deploy stack + llama-cpp qwen3-8b + openclaw plugin mount
Three coupled changes for the new recruiter-responder pipeline:
1. stacks/llama-cpp/: add qwen3-8b text-only model to llama-swap. Uses
unsloth/Qwen3-8B-GGUF Q4_K_M, 16k context, no mmproj. Refactored the
download Job script + cmd renderer to handle text_only=true (skip
mmproj download + --mmproj flag). The 3 existing vision models stay
on text_only=false; no behaviour change for them.
2. stacks/recruiter-responder/: new stack. Namespace, 2 ExternalSecrets
(app secrets from secret/recruiter-responder, DB creds from Vault DB
engine static-creds/pg-recruiter-responder), Deployment (replicas=1,
Recreate -- IMAP IDLE + APScheduler want single leader), Service
ClusterIP. Image: forgejo.viktorbarzin.me/viktor/recruiter-responder.
3. stacks/openclaw/: add init container `install-recruiter-plugin` that
uses the recruiter-responder image to copy the .mjs plugin into
/home/node/.openclaw/extensions/recruiter-api/ on NFS. Couples plugin
version to the recruiter-responder image tag. Also injects
RECRUITER_RESPONDER_URL + RECRUITER_RESPONDER_TOKEN env vars (token
from openclaw-secrets.recruiter_responder_bearer_token, optional).
Pre-apply checklist for recruiter-responder stack:
- Vault: seed secret/recruiter-responder with webhook_bearer_token,
imap_{me,spam}_{user,pass}, smtp_password, claude_agent_token,
task_webhook_token.
- Vault: add secret/openclaw.recruiter_responder_bearer_token (same as
above webhook_bearer_token).
- dbaas: create DB recruiter_responder + role recruiter_responder,
and Vault DB-engine role static-creds/pg-recruiter-responder.
- Build + push image via Woodpecker (recruiter-responder repo CI).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
95b9f7bc89
commit
7e1580ba8c
4 changed files with 409 additions and 12 deletions
|
|
@ -17,6 +17,8 @@ locals {
|
|||
# snapshot_download with allow_patterns). Stable symlinks model.gguf /
|
||||
# mmproj.gguf are created after download so llama-swap config can be
|
||||
# filename-agnostic.
|
||||
# `text_only = true` skips mmproj download + --mmproj flag (text-only LLM).
|
||||
# Vision models keep `text_only = false` (default).
|
||||
models = {
|
||||
qwen3vl-8b = {
|
||||
hf_repo = "Qwen/Qwen3-VL-8B-Instruct-GGUF"
|
||||
|
|
@ -24,6 +26,7 @@ locals {
|
|||
mmproj_pattern = "*mmproj*.gguf"
|
||||
ctx_size = 3072
|
||||
gpu_layers = 99
|
||||
text_only = false
|
||||
}
|
||||
minicpm-v-4-5 = {
|
||||
hf_repo = "openbmb/MiniCPM-V-4_5-gguf"
|
||||
|
|
@ -31,6 +34,7 @@ locals {
|
|||
mmproj_pattern = "*mmproj*.gguf"
|
||||
ctx_size = 3072
|
||||
gpu_layers = 99
|
||||
text_only = false
|
||||
}
|
||||
qwen3vl-4b = {
|
||||
hf_repo = "Qwen/Qwen3-VL-4B-Instruct-GGUF"
|
||||
|
|
@ -38,6 +42,21 @@ locals {
|
|||
mmproj_pattern = "*mmproj*.gguf"
|
||||
ctx_size = 3072
|
||||
gpu_layers = 99
|
||||
text_only = false
|
||||
}
|
||||
# Text-only triage / drafting model for recruiter-responder.
|
||||
# Q4_K_M, ~4.7GB, 32k native context (capped at 16k here — plenty
|
||||
# for recruiter emails + extraction prompt + JSON output).
|
||||
# Unsloth's GGUF: well-maintained, includes Q4_K_M. Qwen3 is a
|
||||
# thinking-capable model; recruiter-responder disables thinking via
|
||||
# `enable_thinking=false` in the chat-template kwargs.
|
||||
qwen3-8b = {
|
||||
hf_repo = "unsloth/Qwen3-8B-GGUF"
|
||||
gguf_pattern = "*Q4_K_M*.gguf"
|
||||
mmproj_pattern = ""
|
||||
ctx_size = 16384
|
||||
gpu_layers = 99
|
||||
text_only = true
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -55,18 +74,20 @@ locals {
|
|||
|
||||
models = {
|
||||
for mid, cfg in local.models : mid => {
|
||||
cmd = join(" ", [
|
||||
cmd = join(" ", concat([
|
||||
"/app/llama-server",
|
||||
"--host 0.0.0.0",
|
||||
"--port $${PORT}",
|
||||
"-m /models/${mid}/model.gguf",
|
||||
], cfg.text_only ? [] : [
|
||||
"--mmproj /models/${mid}/mmproj.gguf",
|
||||
], [
|
||||
"-ngl ${cfg.gpu_layers}",
|
||||
"-c ${cfg.ctx_size}",
|
||||
"-np 1",
|
||||
"--jinja",
|
||||
"-fa on",
|
||||
])
|
||||
]))
|
||||
ttl = 600 # unload after 10 min idle
|
||||
checkEndpoint = "/health"
|
||||
}
|
||||
|
|
@ -133,11 +154,15 @@ resource "kubernetes_job_v1" "download_models" {
|
|||
for mid, cfg in models.items():
|
||||
local_dir = f"/models/{mid}"
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
print(f"==> downloading {mid} from {cfg['hf_repo']} -> {local_dir}", flush=True)
|
||||
text_only = cfg.get("text_only", False)
|
||||
patterns = [cfg["gguf_pattern"]]
|
||||
if not text_only and cfg.get("mmproj_pattern"):
|
||||
patterns.append(cfg["mmproj_pattern"])
|
||||
print(f"==> downloading {mid} from {cfg['hf_repo']} -> {local_dir} (text_only={text_only})", flush=True)
|
||||
snapshot_download(
|
||||
repo_id=cfg["hf_repo"],
|
||||
local_dir=local_dir,
|
||||
allow_patterns=[cfg["gguf_pattern"], cfg["mmproj_pattern"]],
|
||||
allow_patterns=patterns,
|
||||
token=os.environ.get("HF_TOKEN") or None,
|
||||
# Single-threaded download — multi-worker buffers
|
||||
# multi-GB chunks per worker and OOMs the Job at 2Gi.
|
||||
|
|
@ -146,17 +171,20 @@ resource "kubernetes_job_v1" "download_models" {
|
|||
# Resolve actual filenames and create stable symlinks so
|
||||
# llama-swap config is filename-agnostic.
|
||||
ggufs = [p for p in glob.glob(f"{local_dir}/*Q4_K_M*.gguf") if "mmproj" not in p.lower()]
|
||||
mmprojs = glob.glob(f"{local_dir}/*mmproj*.gguf")
|
||||
if not ggufs:
|
||||
raise SystemExit(f"no GGUF found in {local_dir}")
|
||||
if not mmprojs:
|
||||
raise SystemExit(f"no mmproj found in {local_dir}")
|
||||
gguf_link = f"{local_dir}/model.gguf"
|
||||
mmproj_link = f"{local_dir}/mmproj.gguf"
|
||||
for link, target in ((gguf_link, ggufs[0]), (mmproj_link, mmprojs[0])):
|
||||
if os.path.islink(link) or os.path.exists(link):
|
||||
os.unlink(link)
|
||||
os.symlink(os.path.basename(target), link)
|
||||
if os.path.islink(gguf_link) or os.path.exists(gguf_link):
|
||||
os.unlink(gguf_link)
|
||||
os.symlink(os.path.basename(ggufs[0]), gguf_link)
|
||||
if not text_only:
|
||||
mmprojs = glob.glob(f"{local_dir}/*mmproj*.gguf")
|
||||
if not mmprojs:
|
||||
raise SystemExit(f"no mmproj found in {local_dir}")
|
||||
mmproj_link = f"{local_dir}/mmproj.gguf"
|
||||
if os.path.islink(mmproj_link) or os.path.exists(mmproj_link):
|
||||
os.unlink(mmproj_link)
|
||||
os.symlink(os.path.basename(mmprojs[0]), mmproj_link)
|
||||
print(f"==> done {mid}", flush=True)
|
||||
for f in sorted(os.listdir(local_dir)):
|
||||
full = os.path.join(local_dir, f)
|
||||
|
|
|
|||
|
|
@ -456,6 +456,32 @@ resource "kubernetes_deployment" "openclaw" {
|
|||
# Dotfiles already exist on NFS at /home/node/.openclaw/dotfiles from
|
||||
# a previous clone. To update, run git pull manually or via CronJob.
|
||||
|
||||
# Init 3: install the recruiter-api OpenClaw plugin from the
|
||||
# recruiter-responder image into NFS extensions/. Plugin lifecycle
|
||||
# is coupled to the recruiter-responder image tag — bumping that
|
||||
# tag re-installs the plugin on next openclaw pod restart.
|
||||
init_container {
|
||||
name = "install-recruiter-plugin"
|
||||
image = "forgejo.viktorbarzin.me/viktor/recruiter-responder:latest"
|
||||
command = ["sh", "-c", <<-EOT
|
||||
set -eu
|
||||
mkdir -p /home/node/.openclaw/extensions/recruiter-api
|
||||
cp -r /app/openclaw-plugin/. /home/node/.openclaw/extensions/recruiter-api/
|
||||
chown -R 1000:1000 /home/node/.openclaw/extensions/recruiter-api
|
||||
echo "recruiter-api plugin installed at /home/node/.openclaw/extensions/recruiter-api"
|
||||
ls -la /home/node/.openclaw/extensions/recruiter-api
|
||||
EOT
|
||||
]
|
||||
volume_mount {
|
||||
name = "openclaw-home"
|
||||
mount_path = "/home/node/.openclaw"
|
||||
}
|
||||
resources {
|
||||
requests = { cpu = "50m", memory = "64Mi" }
|
||||
limits = { memory = "128Mi" }
|
||||
}
|
||||
}
|
||||
|
||||
# Main container: OpenClaw
|
||||
container {
|
||||
name = "openclaw"
|
||||
|
|
@ -533,6 +559,23 @@ resource "kubernetes_deployment" "openclaw" {
|
|||
}
|
||||
}
|
||||
}
|
||||
# Recruiter Responder API — consumed by the recruiter-api plugin
|
||||
# (mounted into /home/node/.openclaw/extensions/recruiter-api/ via
|
||||
# the install-recruiter-plugin init container below).
|
||||
env {
|
||||
name = "RECRUITER_RESPONDER_URL"
|
||||
value = "http://recruiter-responder.recruiter-responder.svc.cluster.local:8080"
|
||||
}
|
||||
env {
|
||||
name = "RECRUITER_RESPONDER_TOKEN"
|
||||
value_from {
|
||||
secret_key_ref {
|
||||
name = "openclaw-secrets"
|
||||
key = "recruiter_responder_bearer_token"
|
||||
optional = true
|
||||
}
|
||||
}
|
||||
}
|
||||
# Python packages path for skills
|
||||
env {
|
||||
name = "PYTHONPATH"
|
||||
|
|
|
|||
302
stacks/recruiter-responder/main.tf
Normal file
302
stacks/recruiter-responder/main.tf
Normal file
|
|
@ -0,0 +1,302 @@
|
|||
variable "image_tag" {
|
||||
type = string
|
||||
default = "latest"
|
||||
description = "recruiter-responder image tag. Use 8-char git SHA in CI."
|
||||
}
|
||||
|
||||
variable "postgresql_host" { type = string }
|
||||
|
||||
locals {
|
||||
namespace = "recruiter-responder"
|
||||
image = "forgejo.viktorbarzin.me/viktor/recruiter-responder:${var.image_tag}"
|
||||
labels = {
|
||||
app = "recruiter-responder"
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_namespace" "recruiter_responder" {
|
||||
metadata {
|
||||
name = local.namespace
|
||||
labels = {
|
||||
tier = local.tiers.aux
|
||||
"istio-injection" = "disabled"
|
||||
}
|
||||
}
|
||||
lifecycle {
|
||||
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label.
|
||||
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
|
||||
}
|
||||
}
|
||||
|
||||
# App secrets — seed these in Vault before applying:
|
||||
# secret/recruiter-responder
|
||||
# webhook_bearer_token — bearer for all /api/* endpoints (consumed by
|
||||
# the OpenClaw recruiter-api plugin)
|
||||
# imap_me_user — IMAP for me@viktorbarzin.me (full address)
|
||||
# imap_me_pass — IMAP password for me@
|
||||
# imap_spam_user — IMAP for spam@viktorbarzin.me
|
||||
# imap_spam_pass — IMAP password for spam@
|
||||
# smtp_password — SMTP password for me@viktorbarzin.me
|
||||
# claude_agent_token — Bearer for claude-agent-service (Tier-2)
|
||||
# task_webhook_token — Bearer for OpenClaw task-webhook (optional;
|
||||
# empty allowed if task-webhook is unauthed)
|
||||
#
|
||||
# Schema in CNPG: `recruiter_responder` (alembic creates on first migrate).
|
||||
# DB user: created via Vault database engine — see static-creds/pg-recruiter-responder.
|
||||
resource "kubernetes_manifest" "external_secret" {
|
||||
manifest = {
|
||||
apiVersion = "external-secrets.io/v1beta1"
|
||||
kind = "ExternalSecret"
|
||||
metadata = {
|
||||
name = "recruiter-responder-secrets"
|
||||
namespace = local.namespace
|
||||
}
|
||||
spec = {
|
||||
refreshInterval = "15m"
|
||||
secretStoreRef = {
|
||||
name = "vault-kv"
|
||||
kind = "ClusterSecretStore"
|
||||
}
|
||||
target = {
|
||||
name = "recruiter-responder-secrets"
|
||||
template = {
|
||||
metadata = {
|
||||
annotations = {
|
||||
"reloader.stakater.com/match" = "true"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
data = [
|
||||
{ secretKey = "WEBHOOK_BEARER_TOKEN", remoteRef = { key = "recruiter-responder", property = "webhook_bearer_token" } },
|
||||
{ secretKey = "IMAP_ME_USER", remoteRef = { key = "recruiter-responder", property = "imap_me_user" } },
|
||||
{ secretKey = "IMAP_ME_PASS", remoteRef = { key = "recruiter-responder", property = "imap_me_pass" } },
|
||||
{ secretKey = "IMAP_SPAM_USER", remoteRef = { key = "recruiter-responder", property = "imap_spam_user" } },
|
||||
{ secretKey = "IMAP_SPAM_PASS", remoteRef = { key = "recruiter-responder", property = "imap_spam_pass" } },
|
||||
{ secretKey = "SMTP_PASSWORD", remoteRef = { key = "recruiter-responder", property = "smtp_password" } },
|
||||
{ secretKey = "CLAUDE_AGENT_TOKEN", remoteRef = { key = "recruiter-responder", property = "claude_agent_token" } },
|
||||
{ secretKey = "TASK_WEBHOOK_TOKEN", remoteRef = { key = "recruiter-responder", property = "task_webhook_token" } },
|
||||
]
|
||||
}
|
||||
}
|
||||
depends_on = [kubernetes_namespace.recruiter_responder]
|
||||
}
|
||||
|
||||
# DB credentials from Vault database engine (7-day rotation).
|
||||
# Builds the asyncpg DSN consumed by the FastAPI app as DB_CONNECTION_STRING.
|
||||
# Pre-req in dbaas: CNPG cluster has DB `recruiter_responder`, role
|
||||
# `recruiter_responder`, and Vault role `static-creds/pg-recruiter-responder`.
|
||||
resource "kubernetes_manifest" "db_external_secret" {
|
||||
manifest = {
|
||||
apiVersion = "external-secrets.io/v1beta1"
|
||||
kind = "ExternalSecret"
|
||||
metadata = {
|
||||
name = "recruiter-responder-db-creds"
|
||||
namespace = local.namespace
|
||||
}
|
||||
spec = {
|
||||
refreshInterval = "15m"
|
||||
secretStoreRef = {
|
||||
name = "vault-database"
|
||||
kind = "ClusterSecretStore"
|
||||
}
|
||||
target = {
|
||||
name = "recruiter-responder-db-creds"
|
||||
template = {
|
||||
metadata = {
|
||||
annotations = {
|
||||
"reloader.stakater.com/match" = "true"
|
||||
}
|
||||
}
|
||||
data = {
|
||||
DB_CONNECTION_STRING = "postgresql+asyncpg://recruiter_responder:{{ .password }}@${var.postgresql_host}:5432/recruiter_responder"
|
||||
DB_PASSWORD = "{{ .password }}"
|
||||
}
|
||||
}
|
||||
}
|
||||
data = [{
|
||||
secretKey = "password"
|
||||
remoteRef = {
|
||||
key = "static-creds/pg-recruiter-responder"
|
||||
property = "password"
|
||||
}
|
||||
}]
|
||||
}
|
||||
}
|
||||
depends_on = [kubernetes_namespace.recruiter_responder]
|
||||
}
|
||||
|
||||
resource "kubernetes_deployment" "recruiter_responder" {
|
||||
metadata {
|
||||
name = "recruiter-responder"
|
||||
namespace = kubernetes_namespace.recruiter_responder.metadata[0].name
|
||||
labels = merge(local.labels, {
|
||||
tier = local.tiers.aux
|
||||
})
|
||||
annotations = {
|
||||
"reloader.stakater.com/search" = "true"
|
||||
}
|
||||
}
|
||||
|
||||
spec {
|
||||
# IMAP IDLE + APScheduler want a single leader; concurrency hurts both.
|
||||
replicas = 1
|
||||
strategy {
|
||||
type = "Recreate"
|
||||
}
|
||||
|
||||
selector {
|
||||
match_labels = local.labels
|
||||
}
|
||||
|
||||
template {
|
||||
metadata {
|
||||
labels = local.labels
|
||||
}
|
||||
|
||||
spec {
|
||||
image_pull_secrets {
|
||||
name = "registry-credentials"
|
||||
}
|
||||
|
||||
init_container {
|
||||
name = "alembic-migrate"
|
||||
image = local.image
|
||||
command = ["alembic", "upgrade", "head"]
|
||||
|
||||
env_from {
|
||||
secret_ref { name = "recruiter-responder-secrets" }
|
||||
}
|
||||
env_from {
|
||||
secret_ref { name = "recruiter-responder-db-creds" }
|
||||
}
|
||||
|
||||
resources {
|
||||
requests = { cpu = "50m", memory = "256Mi" }
|
||||
limits = { memory = "512Mi" }
|
||||
}
|
||||
}
|
||||
|
||||
container {
|
||||
name = "recruiter-responder"
|
||||
image = local.image
|
||||
|
||||
port {
|
||||
container_port = 8080
|
||||
}
|
||||
|
||||
env_from {
|
||||
secret_ref { name = "recruiter-responder-secrets" }
|
||||
}
|
||||
env_from {
|
||||
secret_ref { name = "recruiter-responder-db-creds" }
|
||||
}
|
||||
|
||||
# IMAP fan-in: read both mailboxes off the in-cluster mailserver.
|
||||
env {
|
||||
name = "IMAP_MAILBOXES"
|
||||
value = "me,spam"
|
||||
}
|
||||
env {
|
||||
name = "IMAP_ME_HOST"
|
||||
value = "mailserver.mailserver.svc.cluster.local"
|
||||
}
|
||||
env {
|
||||
name = "IMAP_SPAM_HOST"
|
||||
value = "mailserver.mailserver.svc.cluster.local"
|
||||
}
|
||||
# SMTP (outbound reply) — same mailserver service, STARTTLS on 587.
|
||||
env {
|
||||
name = "SMTP_HOST"
|
||||
value = "mailserver.mailserver.svc.cluster.local"
|
||||
}
|
||||
env {
|
||||
name = "SMTP_PORT"
|
||||
value = "587"
|
||||
}
|
||||
env {
|
||||
name = "SMTP_USER"
|
||||
value = "me@viktorbarzin.me"
|
||||
}
|
||||
env {
|
||||
name = "SMTP_FROM_ADDR"
|
||||
value = "me@viktorbarzin.me"
|
||||
}
|
||||
env {
|
||||
name = "SMTP_FROM_NAME"
|
||||
value = "Viktor Barzin"
|
||||
}
|
||||
# Tier-0 LLM
|
||||
env {
|
||||
name = "LLAMA_SWAP_URL"
|
||||
value = "http://llama-swap.llama-cpp.svc.cluster.local:8080"
|
||||
}
|
||||
env {
|
||||
name = "LLAMA_SWAP_MODEL"
|
||||
value = "qwen3-8b"
|
||||
}
|
||||
# Tier-2 LLM (deep_research only)
|
||||
env {
|
||||
name = "CLAUDE_AGENT_URL"
|
||||
value = "http://claude-agent-service.claude-agent.svc.cluster.local:8080"
|
||||
}
|
||||
# OpenClaw proactive push
|
||||
env {
|
||||
name = "TASK_WEBHOOK_URL"
|
||||
value = "http://task-webhook.openclaw.svc.cluster.local"
|
||||
}
|
||||
|
||||
readiness_probe {
|
||||
http_get {
|
||||
path = "/healthz"
|
||||
port = 8080
|
||||
}
|
||||
initial_delay_seconds = 5
|
||||
period_seconds = 10
|
||||
}
|
||||
liveness_probe {
|
||||
http_get {
|
||||
path = "/healthz"
|
||||
port = 8080
|
||||
}
|
||||
initial_delay_seconds = 30
|
||||
period_seconds = 30
|
||||
}
|
||||
|
||||
resources {
|
||||
requests = { cpu = "100m", memory = "384Mi" }
|
||||
limits = { memory = "768Mi" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lifecycle {
|
||||
ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1
|
||||
}
|
||||
|
||||
depends_on = [
|
||||
kubernetes_manifest.external_secret,
|
||||
kubernetes_manifest.db_external_secret,
|
||||
]
|
||||
}
|
||||
|
||||
resource "kubernetes_service" "recruiter_responder" {
|
||||
metadata {
|
||||
name = "recruiter-responder"
|
||||
namespace = kubernetes_namespace.recruiter_responder.metadata[0].name
|
||||
labels = local.labels
|
||||
}
|
||||
|
||||
spec {
|
||||
type = "ClusterIP"
|
||||
selector = local.labels
|
||||
|
||||
port {
|
||||
name = "http"
|
||||
port = 8080
|
||||
target_port = 8080
|
||||
}
|
||||
}
|
||||
}
|
||||
24
stacks/recruiter-responder/terragrunt.hcl
Normal file
24
stacks/recruiter-responder/terragrunt.hcl
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
include "root" {
|
||||
path = find_in_parent_folders()
|
||||
}
|
||||
|
||||
dependency "platform" {
|
||||
config_path = "../platform"
|
||||
skip_outputs = true
|
||||
}
|
||||
|
||||
dependency "vault" {
|
||||
config_path = "../vault"
|
||||
skip_outputs = true
|
||||
}
|
||||
|
||||
dependency "external-secrets" {
|
||||
config_path = "../external-secrets"
|
||||
skip_outputs = true
|
||||
}
|
||||
|
||||
inputs = {
|
||||
# Override per-deploy in CI / commit. Initial build will land on forgejo
|
||||
# as `forgejo.viktorbarzin.me/viktor/recruiter-responder:<8-char-sha>`.
|
||||
image_tag = "latest"
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue