From 0386f03f1a1e793a5331569106a04cbc968675fd Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sat, 18 Apr 2026 11:16:21 +0000 Subject: [PATCH] =?UTF-8?q?[ollama]=20Destroy=20stack=20=E2=80=94=20decomm?= =?UTF-8?q?issioned?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Context Stage 8 of ollama decommission. With the ollama-tcp Traefik entrypoint and IngressRouteTCP removed (stages 1-2), all downstream consumers re-routed or cleaned (stages 3-6), and the root tfvar dropped (stage 7), the ollama stack has no live consumers and can be destroyed. ## This change - `terragrunt destroy -auto-approve` on stacks/ollama. - Result: `Destroy complete! Resources: 18 destroyed.` - 1 namespace (ollama) - 2 deployments (ollama, ollama-ui) - 2 services (ollama, ollama-ui) - 3 ingresses (ollama, ollama-server, ollama-api) + 3 Cloudflare DNS records (proxied ollama, non-proxied A + AAAA for ollama-api) - 2 PVCs (ollama-data-host NFS, ollama-ui-data-proxmox — including the stuck Pending one from 47h ago; no finalizer trick needed) - 1 NFS PV (ollama-data-host) - 1 middleware (ollama_api_basic_auth_middleware) - 2 secrets (tls_secret, ollama_api_basic_auth) - 1 ExternalSecret manifest (external_secret) - Directory `stacks/ollama/` fully removed. - Verified `kubectl get ns ollama` → NotFound. ## Destroy blocker and fix The initial `tg destroy` failed because `variable "ollama_host"` in `stacks/ollama/main.tf` had no default and we had already removed it from `config.tfvars` in stage 7. Added `default = "ollama.ollama.svc.cluster.local"` to the variable, re-ran destroy successfully, then removed the whole directory as part of this commit (so the temporary default never ships). ## What is NOT in this change - Vault `secret/ollama` still present (stage 9 cleanup pending if vault authenticated interactively). - NFS data at `/srv/nfs-ssd/ollama/` still present (stage 9 cleanup). - `/home/wizard/code/infra/secrets/nfs_directories.txt` still lists ollama (stage 9 — requires git-crypt unlock). ## Test plan ### Automated - `scripts/tg destroy -auto-approve` → "Destroy complete! Resources: 18 destroyed." - `kubectl get ns ollama` → "NotFound" (confirmed). ### Manual Verification 1. `kubectl get ns ollama` → NotFound. 2. `dig ollama.viktorbarzin.me @1.1.1.1` → Cloudflare record removed (propagation may take up to 5m). 3. `ls /home/wizard/code/infra/stacks/ollama/` → directory does not exist. Co-Authored-By: Claude Opus 4.7 (1M context) --- stacks/ollama/.terraform.lock.hcl | 73 ------ stacks/ollama/backend.tf | 7 - stacks/ollama/main.tf | 380 ------------------------------ stacks/ollama/providers.tf | 33 --- stacks/ollama/secrets | 1 - stacks/ollama/terragrunt.hcl | 13 - stacks/ollama/values.yaml | 28 --- 7 files changed, 535 deletions(-) delete mode 100644 stacks/ollama/.terraform.lock.hcl delete mode 100644 stacks/ollama/backend.tf delete mode 100644 stacks/ollama/main.tf delete mode 100644 stacks/ollama/providers.tf delete mode 120000 stacks/ollama/secrets delete mode 100644 stacks/ollama/terragrunt.hcl delete mode 100644 stacks/ollama/values.yaml diff --git a/stacks/ollama/.terraform.lock.hcl b/stacks/ollama/.terraform.lock.hcl deleted file mode 100644 index a1ca7484..00000000 --- a/stacks/ollama/.terraform.lock.hcl +++ /dev/null @@ -1,73 +0,0 @@ -# This file is maintained automatically by "terraform init". -# Manual edits may be lost in future updates. - -provider "registry.terraform.io/cloudflare/cloudflare" { - version = "4.52.7" - constraints = "~> 4.0" - hashes = [ - "h1:pPItIWii5oymR+geZB219ROSPuSODPLTlM4S/u8xLvM=", - "zh:0c904ce31a4c6c4a5b3bf7ff1560e77c0cc7e2450c8553ded8e8c90398e1418b", - "zh:36183d310c36373fe4cb936b83c595c6fd3b0a94bc7827f28e5789ccbf59752e", - "zh:556a568a6f0235e8f41647de9e4d3a1e7b1d6502df8b19b54ec441f1c653ea10", - "zh:633ebbd5b0245e75e500ef9be4d9e62288f97e8da3baaa51323892a786d90285", - "zh:6acfe60cf52a65ba8f044f748548d2119e7f4fd7f8ebcb14698960d87c68f529", - "zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f", - "zh:904acc31ebb9d6ef68c792074b30532ee61bf515f19e0a3c75b46f126cca1f13", - "zh:a1d0a81246afc8750286d3f6fe7a8fbe6460dd2662407b28dbfbabb612e5fa9d", - "zh:a41a36fe253fc365fe2b7ffc749624688b2693b4634862fda161179ab100029f", - "zh:a7ef269e77ffa8715c8945a2c14322c7ff159ea44c15f62505f3cbb2cae3b32d", - "zh:b01aa3bed30610633b762df64332b26f8844a68c3960cebcb30f04918efc67fe", - "zh:b069cc2cd18cae10757df3ae030508eac8d55de7e49eda7a5e3e11f2f7fe6455", - "zh:b2d2c6313729ebb7465dceece374049e2d08bda34473901be9ff46a8836d42b2", - "zh:db0e114edaf4bc2f3d4769958807c83022bfbc619a00bdf4c4bd17faa4ab2d8b", - "zh:ecc0aa8b9044f664fd2aaf8fa992d976578f78478980555b4b8f6148e8d1a5fe", - ] -} - -provider "registry.terraform.io/hashicorp/helm" { - version = "3.1.1" - hashes = [ - "h1:47CqNwkxctJtL/N/JuEj+8QMg8mRNI/NWeKO5/ydfZU=", - "h1:5b2ojWKT0noujHiweCds37ZreRFRQLNaErdJLusJN88=", - "zh:1a6d5ce931708aec29d1f3d9e360c2a0c35ba5a54d03eeaff0ce3ca597cd0275", - "zh:3411919ba2a5941801e677f0fea08bdd0ae22ba3c9ce3309f55554699e06524a", - "zh:81b36138b8f2320dc7f877b50f9e38f4bc614affe68de885d322629dd0d16a29", - "zh:95a2a0a497a6082ee06f95b38bd0f0d6924a65722892a856cfd914c0d117f104", - "zh:9d3e78c2d1bb46508b972210ad706dd8c8b106f8b206ecf096cd211c54f46990", - "zh:a79139abf687387a6efdbbb04289a0a8e7eaca2bd91cdc0ce68ea4f3286c2c34", - "zh:aaa8784be125fbd50c48d84d6e171d3fb6ef84a221dbc5165c067ce05faab4c8", - "zh:afecd301f469975c9d8f350cc482fe656e082b6ab0f677d1a816c3c615837cc1", - "zh:c54c22b18d48ff9053d899d178d9ffef7d9d19785d9bf310a07d648b7aac075b", - "zh:db2eefd55aea48e73384a555c72bac3f7d428e24147bedb64e1a039398e5b903", - "zh:ee61666a233533fd2be971091cecc01650561f1585783c381b6f6e8a390198a4", - "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - ] -} - -provider "registry.terraform.io/hashicorp/kubernetes" { - version = "3.1.0" - hashes = [ - "h1:oodIAuFMikXNmEtil5MQgP4dfSctUBYQiGJfjbsF3NY=", - ] -} - -provider "registry.terraform.io/hashicorp/vault" { - version = "4.8.0" - constraints = "~> 4.0" - hashes = [ - "h1:GPfhH6dr1LY0foPBDYv9bEGifx7eSwYqFcEAOWOUxLk=", - "h1:aHqgWQhDBMeZO9iUKwJYMlh4q+xNMUlMIcjRbF4d02Y=", - "zh:269ab13433f67684012ae7e15876532b0312f5d0d2002a9cf9febb1279ce5ea6", - "zh:4babc95bf0c40eb85005db1dc2ca403c46be4a71dd3e409db3711a56f7a5ca0e", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:86e27c1c625ecc24446a11eeffc3ac319b36c2b4e51251db8579256a0dbcf136", - "zh:a32f31da94824009e26b077374440b52098aecb93c92ff55dc3d31dd37c4ea25", - "zh:be0a18c6c0425518bab4fbffd82078b82036a88503b5d76064de551c9f646cbf", - "zh:be5a77fdfd36863ebeec79cd12b1d13322ffad6821d157a0b279789fa06b5937", - "zh:be8317d142a3caad74c7d936039ae27076a1b2b8312ef5208e2871a5f525977c", - "zh:c94a84895a3d9954b80e983eed4603330a5cdbbd8eef5b3c99278c2d1402ef3c", - "zh:de1fb712784dd8415f011ca5346a34f87fab6046c730557615247e511dbc7d98", - "zh:e3eafae7da550f86cae395d6660b2a0e93ec8d2b0e0e5ef982ec762e961fc952", - "zh:ff35fb1ab6add288f0f368981e56f780b50405accd1937131cba1137999c8d83", - ] -} diff --git a/stacks/ollama/backend.tf b/stacks/ollama/backend.tf deleted file mode 100644 index 3c61d7a6..00000000 --- a/stacks/ollama/backend.tf +++ /dev/null @@ -1,7 +0,0 @@ -# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa -terraform { - backend "pg" { - conn_str = "postgres://terraform_state:SBlzGxotNUN6HH9d0S-m@10.0.20.200:5432/terraform_state?sslmode=disable" - schema_name = "ollama" - } -} diff --git a/stacks/ollama/main.tf b/stacks/ollama/main.tf deleted file mode 100644 index 51aaf2b7..00000000 --- a/stacks/ollama/main.tf +++ /dev/null @@ -1,380 +0,0 @@ -variable "tls_secret_name" { - type = string - sensitive = true -} -variable "nfs_server" { type = string } -variable "ollama_host" { type = string } - -resource "kubernetes_manifest" "external_secret" { - manifest = { - apiVersion = "external-secrets.io/v1beta1" - kind = "ExternalSecret" - metadata = { - name = "ollama-secrets" - namespace = "ollama" - } - spec = { - refreshInterval = "15m" - secretStoreRef = { - name = "vault-kv" - kind = "ClusterSecretStore" - } - target = { - name = "ollama-secrets" - } - dataFrom = [{ - extract = { - key = "ollama" - } - }] - } - } - depends_on = [kubernetes_namespace.ollama] -} - -data "kubernetes_secret" "eso_secrets" { - metadata { - name = "ollama-secrets" - namespace = kubernetes_namespace.ollama.metadata[0].name - } - depends_on = [kubernetes_manifest.external_secret] -} - -locals { - api_credentials = jsondecode(data.kubernetes_secret.eso_secrets.data["api_credentials"]) -} - - -resource "kubernetes_namespace" "ollama" { - metadata { - name = "ollama" - labels = { - tier = local.tiers.gpu - } - } -} - -module "tls_secret" { - source = "../../modules/kubernetes/setup_tls_secret" - namespace = kubernetes_namespace.ollama.metadata[0].name - tls_secret_name = var.tls_secret_name -} - -module "nfs_ollama_data_host" { - source = "../../modules/kubernetes/nfs_volume" - name = "ollama-data-host" - namespace = kubernetes_namespace.ollama.metadata[0].name - nfs_server = "192.168.1.127" - nfs_path = "/srv/nfs-ssd/ollama" -} - -resource "kubernetes_persistent_volume_claim" "ollama_ui_data_proxmox" { - wait_until_bound = false - metadata { - name = "ollama-ui-data-proxmox" - namespace = kubernetes_namespace.ollama.metadata[0].name - annotations = { - "resize.topolvm.io/threshold" = "80%" - "resize.topolvm.io/increase" = "100%" - "resize.topolvm.io/storage_limit" = "5Gi" - } - } - spec { - access_modes = ["ReadWriteOnce"] - storage_class_name = "proxmox-lvm" - resources { - requests = { - storage = "1Gi" - } - } - } -} - -# resource "helm_release" "ollama" { -# namespace = kubernetes_namespace.ollama.metadata[0].name -# name = "ollama" - -# repository = "https://otwld.github.io/ollama-helm/" -# chart = "ollama" -# atomic = true - -# values = [templatefile("${path.module}/values.yaml", {})] -# timeout = 2400 -# } - - -resource "kubernetes_deployment" "ollama" { - metadata { - name = "ollama" - namespace = kubernetes_namespace.ollama.metadata[0].name - labels = { - app = "ollama" - tier = local.tiers.gpu - } - } - spec { - replicas = 0 # Scaled down — low usage, saves resources + clears ExternalAccessDivergence alert - selector { - match_labels = { - app = "ollama" - } - } - template { - metadata { - labels = { - app = "ollama" - } - annotations = { - "diun.enable" = "true" - "diun.include_tags" = "^\\d+\\.\\d+\\.\\d+$" - } - } - spec { - node_selector = { - "gpu" = "true" - } - toleration { - key = "nvidia.com/gpu" - value = "true" - effect = "NoSchedule" - } - container { - image = "ollama/ollama:0.6.8" - name = "ollama" - env { - name = "OLLAMA_HOST" - value = "0.0.0.0:11434" - } - env { - name = "PATH" - value = "/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" - } - env { - name = "OLLAMA_KEEP_ALIVE" - value = "1h" - } - - port { - container_port = 11434 - } - volume_mount { - name = "ollama-data" - mount_path = "/root/.ollama" - } - resources { - requests = { - cpu = "100m" - memory = "256Mi" - } - limits = { - memory = "256Mi" - "nvidia.com/gpu" = "1" - } - } - } - volume { - name = "ollama-data" - persistent_volume_claim { - claim_name = module.nfs_ollama_data_host.claim_name - } - } - } - } - } -} - -resource "kubernetes_service" "ollama" { - metadata { - name = "ollama" - namespace = kubernetes_namespace.ollama.metadata[0].name - labels = { - app = "ollama" - } - } - - spec { - selector = { - app = "ollama" - } - port { - name = "http" - port = 11434 - } - } -} - -# Allow ollama to be connected to from external apps (internal LAN only) -module "ollama-ingress" { - source = "../../modules/kubernetes/ingress_factory" - namespace = kubernetes_namespace.ollama.metadata[0].name - name = "ollama-server" - service_name = "ollama" - root_domain = "viktorbarzin.lan" - tls_secret_name = var.tls_secret_name - allow_local_access_only = true - ssl_redirect = false - port = 11434 - extra_annotations = { - "gethomepage.dev/enabled" = "false" - } -} - -# Ollama API ingress for external access (basicAuth protected) -locals { - ollama_api_htpasswd = join("\n", [for name, pass in local.api_credentials : "${name}:${bcrypt(pass, 10)}"]) -} - -resource "kubernetes_secret" "ollama_api_basic_auth" { - metadata { - name = "ollama-api-basic-auth-secret" - namespace = kubernetes_namespace.ollama.metadata[0].name - } - - data = { - auth = local.ollama_api_htpasswd - } - - type = "Opaque" - lifecycle { - ignore_changes = [data] - } -} - -resource "kubernetes_manifest" "ollama_api_basic_auth_middleware" { - manifest = { - apiVersion = "traefik.io/v1alpha1" - kind = "Middleware" - metadata = { - name = "ollama-api-basic-auth" - namespace = kubernetes_namespace.ollama.metadata[0].name - } - spec = { - basicAuth = { - secret = kubernetes_secret.ollama_api_basic_auth.metadata[0].name - } - } - } -} - -module "ollama-api-ingress" { - source = "../../modules/kubernetes/ingress_factory" - dns_type = "non-proxied" - namespace = kubernetes_namespace.ollama.metadata[0].name - name = "ollama-api" - service_name = "ollama" - root_domain = "viktorbarzin.me" - tls_secret_name = var.tls_secret_name - ssl_redirect = true - port = 11434 - extra_annotations = { - "traefik.ingress.kubernetes.io/router.middlewares" = "ollama-ollama-api-basic-auth@kubernetescrd,traefik-rate-limit@kubernetescrd,traefik-crowdsec@kubernetescrd" - "gethomepage.dev/enabled" = "false" - } -} - -# Web UI -resource "kubernetes_deployment" "ollama-ui" { - metadata { - name = "ollama-ui" - namespace = kubernetes_namespace.ollama.metadata[0].name - labels = { - app = "ollama-ui" - tier = local.tiers.gpu - } - } - spec { - # Disabled: reduce cluster memory pressure (2026-03-14 OOM incident) - replicas = 0 - strategy { - type = "Recreate" - } - selector { - match_labels = { - app = "ollama-ui" - } - } - template { - metadata { - labels = { - app = "ollama-ui" - } - annotations = { - "dependency.kyverno.io/wait-for" = "ollama.ollama:11434" - } - } - spec { - container { - # image = "ghcr.io/open-webui/open-webui:main" - image = "ghcr.io/open-webui/open-webui:v0.8.12" - name = "ollama-ui" - env { - name = "OLLAMA_BASE_URL" - value = "http://${var.ollama_host}:11434" - } - - port { - container_port = 8080 - } - volume_mount { - name = "data" - mount_path = "/app/backend/data" - } - resources { - requests = { - cpu = "25m" - memory = "256Mi" - } - limits = { - memory = "256Mi" - } - } - } - volume { - name = "data" - persistent_volume_claim { - claim_name = kubernetes_persistent_volume_claim.ollama_ui_data_proxmox.metadata[0].name - } - } - } - } - } -} - -resource "kubernetes_service" "ollama-ui" { - metadata { - name = "ollama-ui" - namespace = kubernetes_namespace.ollama.metadata[0].name - labels = { - app = "dashy" - } - } - - spec { - selector = { - app = "ollama-ui" - } - port { - name = "http" - port = 80 - target_port = 8080 - } - } -} - -module "ingress" { - source = "../../modules/kubernetes/ingress_factory" - dns_type = "proxied" - namespace = kubernetes_namespace.ollama.metadata[0].name - name = "ollama" - service_name = "ollama-ui" - tls_secret_name = var.tls_secret_name - port = 80 - extra_annotations = { - "gethomepage.dev/enabled" = "true" - "gethomepage.dev/name" = "Ollama" - "gethomepage.dev/description" = "Local LLM inference" - "gethomepage.dev/icon" = "ollama.png" - "gethomepage.dev/group" = "AI & Data" - "gethomepage.dev/pod-selector" = "" - } -} diff --git a/stacks/ollama/providers.tf b/stacks/ollama/providers.tf deleted file mode 100644 index b337a2e9..00000000 --- a/stacks/ollama/providers.tf +++ /dev/null @@ -1,33 +0,0 @@ -# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa -terraform { - required_providers { - vault = { - source = "hashicorp/vault" - version = "~> 4.0" - } - cloudflare = { - source = "cloudflare/cloudflare" - version = "~> 4" - } - } -} - -variable "kube_config_path" { - type = string - default = "~/.kube/config" -} - -provider "kubernetes" { - config_path = var.kube_config_path -} - -provider "helm" { - kubernetes = { - config_path = var.kube_config_path - } -} - -provider "vault" { - address = "https://vault.viktorbarzin.me" - skip_child_token = true -} diff --git a/stacks/ollama/secrets b/stacks/ollama/secrets deleted file mode 120000 index ca54a7cf..00000000 --- a/stacks/ollama/secrets +++ /dev/null @@ -1 +0,0 @@ -../../secrets \ No newline at end of file diff --git a/stacks/ollama/terragrunt.hcl b/stacks/ollama/terragrunt.hcl deleted file mode 100644 index f4c920ab..00000000 --- a/stacks/ollama/terragrunt.hcl +++ /dev/null @@ -1,13 +0,0 @@ -include "root" { - path = find_in_parent_folders() -} - -dependency "platform" { - config_path = "../platform" - skip_outputs = true -} - -dependency "vault" { - config_path = "../vault" - skip_outputs = true -} diff --git a/stacks/ollama/values.yaml b/stacks/ollama/values.yaml deleted file mode 100644 index fe92927a..00000000 --- a/stacks/ollama/values.yaml +++ /dev/null @@ -1,28 +0,0 @@ -ollama: - gpu: - # -- Enable GPU integration - enabled: true - - # -- GPU type: 'nvidia' or 'amd' - type: "nvidia" - - # -- Specify the number of GPU to 1 - number: 1 - - # -- List of models to pull at container startup - models: - pull: - - llama3 - -persistentVolume: - enabled: true - existingClaim: "ollama-pvc" - -nodeSelector: - gpu: "true" - -tolerations: - - key: "nvidia.com/gpu" - operator: "Equal" - value: "true" - effect: "NoSchedule"