paperless-ai: deploy clusterzx/paperless-ai for semantic doc search + AI tagging

Viktor wanted real semantic search over his ~300 Paperless documents and preferred a ready-made solution over building one. paperless-ai provides local-embedding RAG (ChromaDB + sentence-transformers, GPU-free) plus LLM-driven auto-analysis/tagging. Wiring: - LLM (chat answers + tagging) -> in-cluster llama-swap qwen3-8b (OpenAI-compatible); embeddings + vector store are local on the PVC. - Reads Paperless over the internal service via a dedicated `paperless-ai` superuser token (Vault secret/paperless-ai); app-admin creds also in Vault. - Encrypted PVC for /app/data (SQLite + ChromaDB + model cache). - Ingress paperless-ai.viktorbarzin.me behind Authentik (auth=required). - Third-party image pinned (docker.io/clusterzx/paperless-ai:3.0.9), no Keel. Runtime config persists to the PVC .env via the app's one-time setup; the deployment env vars are pre-fill/documentation only. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-18 06:23:00 +00:00 · 2026-06-18 06:23:00 +00:00 · aeee0d02e2
commit aeee0d02e2
parent 605cf99a1b
3 changed files with 385 additions and 0 deletions
--- a/stacks/paperless-ai/main.tf
+++ b/stacks/paperless-ai/main.tf
@ -0,0 +1,358 @@
 variable "tls_secret_name" {
  type      = string
  sensitive = true
 }
 locals {
  namespace = "paperless-ai"
 }
 resource "kubernetes_namespace" "paperless_ai" {
  metadata {
    name = local.namespace
    labels = {
      tier = local.tiers.aux
    }
  }
  lifecycle {
    # KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
    ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
  }
 }
 # paperless-ai secrets pulled from Vault (secret/paperless-ai) by ESO:
 #   paperless_api_token — token for the dedicated `paperless-ai` Paperless
 #                         superuser (reads + tags ALL documents).
 #   api_key             — M2M key between the Node UI and the Python RAG service.
 #   custom_api_key      — placeholder bearer for llama-swap (no auth, field required).
 resource "kubernetes_manifest" "external_secret" {
  manifest = {
    apiVersion = "external-secrets.io/v1beta1"
    kind       = "ExternalSecret"
    metadata = {
      name      = "paperless-ai-secrets"
      namespace = local.namespace
    }
    spec = {
      refreshInterval = "15m"
      secretStoreRef = {
        name = "vault-kv"
        kind = "ClusterSecretStore"
      }
      target = {
        name = "paperless-ai-secrets"
      }
      dataFrom = [{
        extract = {
          key = "paperless-ai"
        }
      }]
    }
  }
  depends_on = [kubernetes_namespace.paperless_ai]
 }
 module "tls_secret" {
  source          = "../../modules/kubernetes/setup_tls_secret"
  namespace       = kubernetes_namespace.paperless_ai.metadata[0].name
  tls_secret_name = var.tls_secret_name
 }
 # /app/data holds the SQLite DB, the embedded ChromaDB vector store
 # (rag_data/), the cached local embedding model, thumbnails and the
 # persisted .env. Sensitive (document-derived vectors + the Paperless
 # token) -> encrypted block storage. Autoresizes 2Gi -> 10Gi.
 resource "kubernetes_persistent_volume_claim" "data_encrypted" {
  wait_until_bound = false
  metadata {
    name      = "paperless-ai-data-encrypted"
    namespace = local.namespace
    annotations = {
      "resize.topolvm.io/threshold"     = "10%"
      "resize.topolvm.io/increase"      = "100%"
      "resize.topolvm.io/storage_limit" = "10Gi"
    }
  }
  spec {
    access_modes       = ["ReadWriteOnce"]
    storage_class_name = "proxmox-lvm-encrypted"
    resources {
      requests = {
        storage = "2Gi"
      }
    }
  }
  lifecycle {
    # pvc-autoresizer grows requests.storage up to storage_limit; PVCs
    # cannot shrink, so ignore drift to keep applies idempotent.
    ignore_changes = [spec[0].resources[0].requests]
  }
 }
 resource "kubernetes_deployment" "paperless_ai" {
  metadata {
    name      = "paperless-ai"
    namespace = local.namespace
    labels = {
      app  = "paperless-ai"
      tier = local.tiers.aux
    }
    annotations = {
      "reloader.stakater.com/auto" = "true"
    }
  }
  # The image bundles PyTorch + Surya OCR (multi-GB); the first pull can
  # exceed the provider's rollout-wait. Don't block apply on readiness —
  # rollout is verified out-of-band with kubectl.
  wait_for_rollout = false
  spec {
    replicas = 1
    # RWO encrypted PVC -> never run two pods against it at once.
    strategy {
      type = "Recreate"
    }
    selector {
      match_labels = {
        app = "paperless-ai"
      }
    }
    template {
      metadata {
        labels = {
          app = "paperless-ai"
        }
      }
      spec {
        # The image runs as PUID/PGID 1000; fsGroup makes the encrypted
        # PVC group-writable so the app can persist to /app/data.
        security_context {
          fs_group = 1000
        }
        container {
          name  = "paperless-ai"
          image = "docker.io/clusterzx/paperless-ai:3.0.9"
          # Node UI (proxied by the Service) + Python RAG service (in-pod only).
          port {
            container_port = 3000
            name           = "http"
          }
          port {
            container_port = 8000
            name           = "rag"
          }
          # NOTE on configuration model: paperless-ai persists its RUNTIME
          # config (Paperless URL/token, AI provider, processing flags) plus
          # the app-admin account to /app/data/.env + SQLite on the PVC,
          # written once via its setup flow (POST /setup). The env vars below
          # are consumed by the Node layer and serve as setup-form pre-fill;
          # the authoritative runtime config is the PVC's .env. App-admin
          # creds + the Paperless token live in Vault secret/paperless-ai.
          env {
            name  = "PUID"
            value = "1000"
          }
          env {
            name  = "PGID"
            value = "1000"
          }
          env {
            name  = "PAPERLESS_AI_PORT"
            value = "3000"
          }
          env {
            name  = "RAG_SERVICE_URL"
            value = "http://localhost:8000"
          }
          env {
            name  = "RAG_SERVICE_ENABLED"
            value = "true"
          }
          # Persist the HuggingFace / sentence-transformers embedding model
          # (paraphrase-multilingual-MiniLM-L12-v2) onto the PVC so it is
          # not re-downloaded on every pod restart.
          env {
            name  = "HF_HOME"
            value = "/app/data/hf-cache"
          }
          env {
            name  = "SENTENCE_TRANSFORMERS_HOME"
            value = "/app/data/st-cache"
          }
          # --- Paperless-ngx connection (internal service, no edge hop) ---
          env {
            name  = "PAPERLESS_API_URL"
            value = "http://paperless-ngx.paperless-ngx.svc.cluster.local/api"
          }
          env {
            name  = "PAPERLESS_USERNAME"
            value = "paperless-ai"
          }
          env {
            name = "PAPERLESS_API_TOKEN"
            value_from {
              secret_key_ref {
                name = "paperless-ai-secrets"
                key  = "paperless_api_token"
              }
            }
          }
          # --- LLM backend: in-cluster llama-swap (OpenAI-compatible) ---
          env {
            name  = "AI_PROVIDER"
            value = "custom"
          }
          env {
            name  = "CUSTOM_BASE_URL"
            value = "http://llama-swap.llama-cpp.svc.cluster.local:8080/v1"
          }
          env {
            name  = "CUSTOM_MODEL"
            value = "qwen3-8b"
          }
          env {
            name = "CUSTOM_API_KEY"
            value_from {
              secret_key_ref {
                name = "paperless-ai-secrets"
                key  = "custom_api_key"
              }
            }
          }
          # M2M key between the Node UI and the Python RAG service.
          env {
            name = "API_KEY"
            value_from {
              secret_key_ref {
                name = "paperless-ai-secrets"
                key  = "api_key"
              }
            }
          }
          # --- Processing: auto-analyze + tag every document ---
          env {
            name  = "SCAN_INTERVAL"
            value = "*/30 * * * *"
          }
          env {
            name  = "PROCESS_PREDEFINED_DOCUMENTS"
            value = "yes"
          }
          env {
            name  = "ADD_AI_PROCESSED_TAG"
            value = "yes"
          }
          env {
            name  = "AI_PROCESSED_TAG_NAME"
            value = "ai-processed"
          }
          volume_mount {
            name       = "data"
            mount_path = "/app/data"
          }
          resources {
            requests = {
              cpu    = "200m"
              memory = "2Gi"
            }
            limits = {
              # torch + the sentence-transformers model load in-process for
              # the RAG service; 4Gi covers Node + Python + ChromaDB.
              memory = "4Gi"
            }
          }
          # The image presents a setup wizard / login that 30x-redirects on
          # `/`, so an HTTP probe is brittle pre-setup. A TCP probe on the
          # Node port is the robust readiness signal (same approach as the
          # paperless-mcp stack).
          startup_probe {
            tcp_socket {
              port = 3000
            }
            failure_threshold = 60
            period_seconds    = 5
          }
          readiness_probe {
            tcp_socket {
              port = 3000
            }
            initial_delay_seconds = 10
            period_seconds        = 15
          }
          liveness_probe {
            tcp_socket {
              port = 3000
            }
            initial_delay_seconds = 60
            period_seconds        = 30
          }
        }
        volume {
          name = "data"
          persistent_volume_claim {
            claim_name = kubernetes_persistent_volume_claim.data_encrypted.metadata[0].name
          }
        }
      }
    }
  }
  lifecycle {
    ignore_changes = [
      spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
    ]
  }
 }
 resource "kubernetes_service" "paperless_ai" {
  metadata {
    name      = "paperless-ai"
    namespace = local.namespace
    labels = {
      app = "paperless-ai"
    }
  }
  spec {
    selector = {
      app = "paperless-ai"
    }
    port {
      name        = "http"
      port        = 80
      target_port = 3000
      protocol    = "TCP"
    }
  }
 }
 module "ingress" {
  source = "../../modules/kubernetes/ingress_factory"
  # auth = "required": private admin UI. paperless-ai has its own login but
  # Authentik forward-auth is the primary gate (defence in depth). It only
  # polls Paperless outbound (no inbound API consumers), so the Authentik
  # 302 dance does not break it.
  auth            = "required"
  namespace       = kubernetes_namespace.paperless_ai.metadata[0].name
  name            = "paperless-ai"
  service_name    = "paperless-ai"
  host            = "paperless-ai"
  dns_type        = "proxied"
  tls_secret_name = var.tls_secret_name
  port            = 80
  extra_annotations = {
    "gethomepage.dev/enabled"     = "true"
    "gethomepage.dev/description" = "AI document search & tagging"
    "gethomepage.dev/group"       = "Productivity"
    "gethomepage.dev/icon"        = "paperless-ngx.png"
    "gethomepage.dev/name"        = "Paperless-AI"
    "gethomepage.dev/pod-selector" = ""
  }
 }
--- a/stacks/paperless-ai/secrets
+++ b/stacks/paperless-ai/secrets
@ -0,0 +1 @@
 ../../secrets
--- a/stacks/paperless-ai/terragrunt.hcl
+++ b/stacks/paperless-ai/terragrunt.hcl
@ -0,0 +1,26 @@
 include "root" {
  path = find_in_parent_folders()
 }
 dependency "platform" {
  config_path  = "../platform"
  skip_outputs = true
 }
 dependency "vault" {
  config_path  = "../vault"
  skip_outputs = true
 }
 # Reads the Paperless API over the in-cluster service.
 dependency "paperless-ngx" {
  config_path  = "../paperless-ngx"
  skip_outputs = true
 }
 # LLM (chat/answer generation + auto-tagging) is served by llama-swap's
 # OpenAI-compatible endpoint; embeddings/semantic search are local in-pod.
 dependency "llama-cpp" {
  config_path  = "../llama-cpp"
  skip_outputs = true
 }