diff --git a/stacks/llama-cpp/main.tf b/stacks/llama-cpp/main.tf index 289f058f..27c952d6 100644 --- a/stacks/llama-cpp/main.tf +++ b/stacks/llama-cpp/main.tf @@ -17,6 +17,8 @@ locals { # snapshot_download with allow_patterns). Stable symlinks model.gguf / # mmproj.gguf are created after download so llama-swap config can be # filename-agnostic. + # `text_only = true` skips mmproj download + --mmproj flag (text-only LLM). + # Vision models keep `text_only = false` (default). models = { qwen3vl-8b = { hf_repo = "Qwen/Qwen3-VL-8B-Instruct-GGUF" @@ -24,6 +26,7 @@ locals { mmproj_pattern = "*mmproj*.gguf" ctx_size = 3072 gpu_layers = 99 + text_only = false } minicpm-v-4-5 = { hf_repo = "openbmb/MiniCPM-V-4_5-gguf" @@ -31,6 +34,7 @@ locals { mmproj_pattern = "*mmproj*.gguf" ctx_size = 3072 gpu_layers = 99 + text_only = false } qwen3vl-4b = { hf_repo = "Qwen/Qwen3-VL-4B-Instruct-GGUF" @@ -38,6 +42,21 @@ locals { mmproj_pattern = "*mmproj*.gguf" ctx_size = 3072 gpu_layers = 99 + text_only = false + } + # Text-only triage / drafting model for recruiter-responder. + # Q4_K_M, ~4.7GB, 32k native context (capped at 16k here — plenty + # for recruiter emails + extraction prompt + JSON output). + # Unsloth's GGUF: well-maintained, includes Q4_K_M. Qwen3 is a + # thinking-capable model; recruiter-responder disables thinking via + # `enable_thinking=false` in the chat-template kwargs. + qwen3-8b = { + hf_repo = "unsloth/Qwen3-8B-GGUF" + gguf_pattern = "*Q4_K_M*.gguf" + mmproj_pattern = "" + ctx_size = 16384 + gpu_layers = 99 + text_only = true } } @@ -55,18 +74,20 @@ locals { models = { for mid, cfg in local.models : mid => { - cmd = join(" ", [ + cmd = join(" ", concat([ "/app/llama-server", "--host 0.0.0.0", "--port $${PORT}", "-m /models/${mid}/model.gguf", + ], cfg.text_only ? [] : [ "--mmproj /models/${mid}/mmproj.gguf", + ], [ "-ngl ${cfg.gpu_layers}", "-c ${cfg.ctx_size}", "-np 1", "--jinja", "-fa on", - ]) + ])) ttl = 600 # unload after 10 min idle checkEndpoint = "/health" } @@ -133,11 +154,15 @@ resource "kubernetes_job_v1" "download_models" { for mid, cfg in models.items(): local_dir = f"/models/{mid}" os.makedirs(local_dir, exist_ok=True) - print(f"==> downloading {mid} from {cfg['hf_repo']} -> {local_dir}", flush=True) + text_only = cfg.get("text_only", False) + patterns = [cfg["gguf_pattern"]] + if not text_only and cfg.get("mmproj_pattern"): + patterns.append(cfg["mmproj_pattern"]) + print(f"==> downloading {mid} from {cfg['hf_repo']} -> {local_dir} (text_only={text_only})", flush=True) snapshot_download( repo_id=cfg["hf_repo"], local_dir=local_dir, - allow_patterns=[cfg["gguf_pattern"], cfg["mmproj_pattern"]], + allow_patterns=patterns, token=os.environ.get("HF_TOKEN") or None, # Single-threaded download — multi-worker buffers # multi-GB chunks per worker and OOMs the Job at 2Gi. @@ -146,17 +171,20 @@ resource "kubernetes_job_v1" "download_models" { # Resolve actual filenames and create stable symlinks so # llama-swap config is filename-agnostic. ggufs = [p for p in glob.glob(f"{local_dir}/*Q4_K_M*.gguf") if "mmproj" not in p.lower()] - mmprojs = glob.glob(f"{local_dir}/*mmproj*.gguf") if not ggufs: raise SystemExit(f"no GGUF found in {local_dir}") - if not mmprojs: - raise SystemExit(f"no mmproj found in {local_dir}") gguf_link = f"{local_dir}/model.gguf" - mmproj_link = f"{local_dir}/mmproj.gguf" - for link, target in ((gguf_link, ggufs[0]), (mmproj_link, mmprojs[0])): - if os.path.islink(link) or os.path.exists(link): - os.unlink(link) - os.symlink(os.path.basename(target), link) + if os.path.islink(gguf_link) or os.path.exists(gguf_link): + os.unlink(gguf_link) + os.symlink(os.path.basename(ggufs[0]), gguf_link) + if not text_only: + mmprojs = glob.glob(f"{local_dir}/*mmproj*.gguf") + if not mmprojs: + raise SystemExit(f"no mmproj found in {local_dir}") + mmproj_link = f"{local_dir}/mmproj.gguf" + if os.path.islink(mmproj_link) or os.path.exists(mmproj_link): + os.unlink(mmproj_link) + os.symlink(os.path.basename(mmprojs[0]), mmproj_link) print(f"==> done {mid}", flush=True) for f in sorted(os.listdir(local_dir)): full = os.path.join(local_dir, f) diff --git a/stacks/openclaw/main.tf b/stacks/openclaw/main.tf index 34442301..7bccd0d1 100644 --- a/stacks/openclaw/main.tf +++ b/stacks/openclaw/main.tf @@ -456,6 +456,32 @@ resource "kubernetes_deployment" "openclaw" { # Dotfiles already exist on NFS at /home/node/.openclaw/dotfiles from # a previous clone. To update, run git pull manually or via CronJob. + # Init 3: install the recruiter-api OpenClaw plugin from the + # recruiter-responder image into NFS extensions/. Plugin lifecycle + # is coupled to the recruiter-responder image tag — bumping that + # tag re-installs the plugin on next openclaw pod restart. + init_container { + name = "install-recruiter-plugin" + image = "forgejo.viktorbarzin.me/viktor/recruiter-responder:latest" + command = ["sh", "-c", <<-EOT + set -eu + mkdir -p /home/node/.openclaw/extensions/recruiter-api + cp -r /app/openclaw-plugin/. /home/node/.openclaw/extensions/recruiter-api/ + chown -R 1000:1000 /home/node/.openclaw/extensions/recruiter-api + echo "recruiter-api plugin installed at /home/node/.openclaw/extensions/recruiter-api" + ls -la /home/node/.openclaw/extensions/recruiter-api + EOT + ] + volume_mount { + name = "openclaw-home" + mount_path = "/home/node/.openclaw" + } + resources { + requests = { cpu = "50m", memory = "64Mi" } + limits = { memory = "128Mi" } + } + } + # Main container: OpenClaw container { name = "openclaw" @@ -533,6 +559,23 @@ resource "kubernetes_deployment" "openclaw" { } } } + # Recruiter Responder API — consumed by the recruiter-api plugin + # (mounted into /home/node/.openclaw/extensions/recruiter-api/ via + # the install-recruiter-plugin init container below). + env { + name = "RECRUITER_RESPONDER_URL" + value = "http://recruiter-responder.recruiter-responder.svc.cluster.local:8080" + } + env { + name = "RECRUITER_RESPONDER_TOKEN" + value_from { + secret_key_ref { + name = "openclaw-secrets" + key = "recruiter_responder_bearer_token" + optional = true + } + } + } # Python packages path for skills env { name = "PYTHONPATH" diff --git a/stacks/recruiter-responder/main.tf b/stacks/recruiter-responder/main.tf new file mode 100644 index 00000000..f8685567 --- /dev/null +++ b/stacks/recruiter-responder/main.tf @@ -0,0 +1,302 @@ +variable "image_tag" { + type = string + default = "latest" + description = "recruiter-responder image tag. Use 8-char git SHA in CI." +} + +variable "postgresql_host" { type = string } + +locals { + namespace = "recruiter-responder" + image = "forgejo.viktorbarzin.me/viktor/recruiter-responder:${var.image_tag}" + labels = { + app = "recruiter-responder" + } +} + +resource "kubernetes_namespace" "recruiter_responder" { + metadata { + name = local.namespace + labels = { + tier = local.tiers.aux + "istio-injection" = "disabled" + } + } + lifecycle { + # KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label. + ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]] + } +} + +# App secrets — seed these in Vault before applying: +# secret/recruiter-responder +# webhook_bearer_token — bearer for all /api/* endpoints (consumed by +# the OpenClaw recruiter-api plugin) +# imap_me_user — IMAP for me@viktorbarzin.me (full address) +# imap_me_pass — IMAP password for me@ +# imap_spam_user — IMAP for spam@viktorbarzin.me +# imap_spam_pass — IMAP password for spam@ +# smtp_password — SMTP password for me@viktorbarzin.me +# claude_agent_token — Bearer for claude-agent-service (Tier-2) +# task_webhook_token — Bearer for OpenClaw task-webhook (optional; +# empty allowed if task-webhook is unauthed) +# +# Schema in CNPG: `recruiter_responder` (alembic creates on first migrate). +# DB user: created via Vault database engine — see static-creds/pg-recruiter-responder. +resource "kubernetes_manifest" "external_secret" { + manifest = { + apiVersion = "external-secrets.io/v1beta1" + kind = "ExternalSecret" + metadata = { + name = "recruiter-responder-secrets" + namespace = local.namespace + } + spec = { + refreshInterval = "15m" + secretStoreRef = { + name = "vault-kv" + kind = "ClusterSecretStore" + } + target = { + name = "recruiter-responder-secrets" + template = { + metadata = { + annotations = { + "reloader.stakater.com/match" = "true" + } + } + } + } + data = [ + { secretKey = "WEBHOOK_BEARER_TOKEN", remoteRef = { key = "recruiter-responder", property = "webhook_bearer_token" } }, + { secretKey = "IMAP_ME_USER", remoteRef = { key = "recruiter-responder", property = "imap_me_user" } }, + { secretKey = "IMAP_ME_PASS", remoteRef = { key = "recruiter-responder", property = "imap_me_pass" } }, + { secretKey = "IMAP_SPAM_USER", remoteRef = { key = "recruiter-responder", property = "imap_spam_user" } }, + { secretKey = "IMAP_SPAM_PASS", remoteRef = { key = "recruiter-responder", property = "imap_spam_pass" } }, + { secretKey = "SMTP_PASSWORD", remoteRef = { key = "recruiter-responder", property = "smtp_password" } }, + { secretKey = "CLAUDE_AGENT_TOKEN", remoteRef = { key = "recruiter-responder", property = "claude_agent_token" } }, + { secretKey = "TASK_WEBHOOK_TOKEN", remoteRef = { key = "recruiter-responder", property = "task_webhook_token" } }, + ] + } + } + depends_on = [kubernetes_namespace.recruiter_responder] +} + +# DB credentials from Vault database engine (7-day rotation). +# Builds the asyncpg DSN consumed by the FastAPI app as DB_CONNECTION_STRING. +# Pre-req in dbaas: CNPG cluster has DB `recruiter_responder`, role +# `recruiter_responder`, and Vault role `static-creds/pg-recruiter-responder`. +resource "kubernetes_manifest" "db_external_secret" { + manifest = { + apiVersion = "external-secrets.io/v1beta1" + kind = "ExternalSecret" + metadata = { + name = "recruiter-responder-db-creds" + namespace = local.namespace + } + spec = { + refreshInterval = "15m" + secretStoreRef = { + name = "vault-database" + kind = "ClusterSecretStore" + } + target = { + name = "recruiter-responder-db-creds" + template = { + metadata = { + annotations = { + "reloader.stakater.com/match" = "true" + } + } + data = { + DB_CONNECTION_STRING = "postgresql+asyncpg://recruiter_responder:{{ .password }}@${var.postgresql_host}:5432/recruiter_responder" + DB_PASSWORD = "{{ .password }}" + } + } + } + data = [{ + secretKey = "password" + remoteRef = { + key = "static-creds/pg-recruiter-responder" + property = "password" + } + }] + } + } + depends_on = [kubernetes_namespace.recruiter_responder] +} + +resource "kubernetes_deployment" "recruiter_responder" { + metadata { + name = "recruiter-responder" + namespace = kubernetes_namespace.recruiter_responder.metadata[0].name + labels = merge(local.labels, { + tier = local.tiers.aux + }) + annotations = { + "reloader.stakater.com/search" = "true" + } + } + + spec { + # IMAP IDLE + APScheduler want a single leader; concurrency hurts both. + replicas = 1 + strategy { + type = "Recreate" + } + + selector { + match_labels = local.labels + } + + template { + metadata { + labels = local.labels + } + + spec { + image_pull_secrets { + name = "registry-credentials" + } + + init_container { + name = "alembic-migrate" + image = local.image + command = ["alembic", "upgrade", "head"] + + env_from { + secret_ref { name = "recruiter-responder-secrets" } + } + env_from { + secret_ref { name = "recruiter-responder-db-creds" } + } + + resources { + requests = { cpu = "50m", memory = "256Mi" } + limits = { memory = "512Mi" } + } + } + + container { + name = "recruiter-responder" + image = local.image + + port { + container_port = 8080 + } + + env_from { + secret_ref { name = "recruiter-responder-secrets" } + } + env_from { + secret_ref { name = "recruiter-responder-db-creds" } + } + + # IMAP fan-in: read both mailboxes off the in-cluster mailserver. + env { + name = "IMAP_MAILBOXES" + value = "me,spam" + } + env { + name = "IMAP_ME_HOST" + value = "mailserver.mailserver.svc.cluster.local" + } + env { + name = "IMAP_SPAM_HOST" + value = "mailserver.mailserver.svc.cluster.local" + } + # SMTP (outbound reply) — same mailserver service, STARTTLS on 587. + env { + name = "SMTP_HOST" + value = "mailserver.mailserver.svc.cluster.local" + } + env { + name = "SMTP_PORT" + value = "587" + } + env { + name = "SMTP_USER" + value = "me@viktorbarzin.me" + } + env { + name = "SMTP_FROM_ADDR" + value = "me@viktorbarzin.me" + } + env { + name = "SMTP_FROM_NAME" + value = "Viktor Barzin" + } + # Tier-0 LLM + env { + name = "LLAMA_SWAP_URL" + value = "http://llama-swap.llama-cpp.svc.cluster.local:8080" + } + env { + name = "LLAMA_SWAP_MODEL" + value = "qwen3-8b" + } + # Tier-2 LLM (deep_research only) + env { + name = "CLAUDE_AGENT_URL" + value = "http://claude-agent-service.claude-agent.svc.cluster.local:8080" + } + # OpenClaw proactive push + env { + name = "TASK_WEBHOOK_URL" + value = "http://task-webhook.openclaw.svc.cluster.local" + } + + readiness_probe { + http_get { + path = "/healthz" + port = 8080 + } + initial_delay_seconds = 5 + period_seconds = 10 + } + liveness_probe { + http_get { + path = "/healthz" + port = 8080 + } + initial_delay_seconds = 30 + period_seconds = 30 + } + + resources { + requests = { cpu = "100m", memory = "384Mi" } + limits = { memory = "768Mi" } + } + } + } + } + } + + lifecycle { + ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1 + } + + depends_on = [ + kubernetes_manifest.external_secret, + kubernetes_manifest.db_external_secret, + ] +} + +resource "kubernetes_service" "recruiter_responder" { + metadata { + name = "recruiter-responder" + namespace = kubernetes_namespace.recruiter_responder.metadata[0].name + labels = local.labels + } + + spec { + type = "ClusterIP" + selector = local.labels + + port { + name = "http" + port = 8080 + target_port = 8080 + } + } +} diff --git a/stacks/recruiter-responder/terragrunt.hcl b/stacks/recruiter-responder/terragrunt.hcl new file mode 100644 index 00000000..08c1ee48 --- /dev/null +++ b/stacks/recruiter-responder/terragrunt.hcl @@ -0,0 +1,24 @@ +include "root" { + path = find_in_parent_folders() +} + +dependency "platform" { + config_path = "../platform" + skip_outputs = true +} + +dependency "vault" { + config_path = "../vault" + skip_outputs = true +} + +dependency "external-secrets" { + config_path = "../external-secrets" + skip_outputs = true +} + +inputs = { + # Override per-deploy in CI / commit. Initial build will land on forgejo + # as `forgejo.viktorbarzin.me/viktor/recruiter-responder:<8-char-sha>`. + image_tag = "latest" +}