fix: restore tree dropped by 6d224861; land stem95su gdrive-sync (10m) [ci skip]

6d224861 came from a --no-checkout worktree whose empty index made the commit drop every file except two. This restores 05b50d2b's full tree and correctly adds stacks/stem95su/gdrive-sync.tf + the service-catalog stem95su entry. Forward-only (parent=6d224861, no force-push); [ci skip] since the live infra was never applied from the broken commit. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-09 08:45:33 +00:00 · 2026-06-09 08:45:33 +00:00 · fd0f4a0365
commit fd0f4a0365
parent 6d224861c4
1166 changed files with 358546 additions and 0 deletions
--- a/stacks/nextcloud/.terraform.lock.hcl
+++ b/stacks/nextcloud/.terraform.lock.hcl
@ -0,0 +1,152 @@
+# This file is maintained automatically by "terraform init".
+# Manual edits may be lost in future updates.
+
+provider "registry.terraform.io/cloudflare/cloudflare" {
+  version     = "4.52.7"
+  constraints = "~> 4.0"
+  hashes = [
+    "h1:pPItIWii5oymR+geZB219ROSPuSODPLTlM4S/u8xLvM=",
+    "zh:0c904ce31a4c6c4a5b3bf7ff1560e77c0cc7e2450c8553ded8e8c90398e1418b",
+    "zh:36183d310c36373fe4cb936b83c595c6fd3b0a94bc7827f28e5789ccbf59752e",
+    "zh:556a568a6f0235e8f41647de9e4d3a1e7b1d6502df8b19b54ec441f1c653ea10",
+    "zh:633ebbd5b0245e75e500ef9be4d9e62288f97e8da3baaa51323892a786d90285",
+    "zh:6acfe60cf52a65ba8f044f748548d2119e7f4fd7f8ebcb14698960d87c68f529",
+    "zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f",
+    "zh:904acc31ebb9d6ef68c792074b30532ee61bf515f19e0a3c75b46f126cca1f13",
+    "zh:a1d0a81246afc8750286d3f6fe7a8fbe6460dd2662407b28dbfbabb612e5fa9d",
+    "zh:a41a36fe253fc365fe2b7ffc749624688b2693b4634862fda161179ab100029f",
+    "zh:a7ef269e77ffa8715c8945a2c14322c7ff159ea44c15f62505f3cbb2cae3b32d",
+    "zh:b01aa3bed30610633b762df64332b26f8844a68c3960cebcb30f04918efc67fe",
+    "zh:b069cc2cd18cae10757df3ae030508eac8d55de7e49eda7a5e3e11f2f7fe6455",
+    "zh:b2d2c6313729ebb7465dceece374049e2d08bda34473901be9ff46a8836d42b2",
+    "zh:db0e114edaf4bc2f3d4769958807c83022bfbc619a00bdf4c4bd17faa4ab2d8b",
+    "zh:ecc0aa8b9044f664fd2aaf8fa992d976578f78478980555b4b8f6148e8d1a5fe",
+  ]
+}
+
+provider "registry.terraform.io/gavinbunney/kubectl" {
+  version     = "1.19.0"
+  constraints = "~> 1.14"
+  hashes = [
+    "h1:9QkxPjp0x5FZFfJbE+B7hBOoads9gmdfj9aYu5N4Sfc=",
+    "zh:1dec8766336ac5b00b3d8f62e3fff6390f5f60699c9299920fc9861a76f00c71",
+    "zh:43f101b56b58d7fead6a511728b4e09f7c41dc2e3963f59cf1c146c4767c6cb7",
+    "zh:4c4fbaa44f60e722f25cc05ee11dfaec282893c5c0ffa27bc88c382dbfbaa35c",
+    "zh:51dd23238b7b677b8a1abbfcc7deec53ffa5ec79e58e3b54d6be334d3d01bc0e",
+    "zh:5afc2ebc75b9d708730dbabdc8f94dd559d7f2fc5a31c5101358bd8d016916ba",
+    "zh:6be6e72d4663776390a82a37e34f7359f726d0120df622f4a2b46619338a168e",
+    "zh:72642d5fcf1e3febb6e5d4ae7b592bb9ff3cb220af041dbda893588e4bf30c0c",
+    "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
+    "zh:a1da03e3239867b35812ee031a1060fed6e8d8e458e2eaca48b5dd51b35f56f7",
+    "zh:b98b6a6728fe277fcd133bdfa7237bd733eae233f09653523f14460f608f8ba2",
+    "zh:bb8b071d0437f4767695c6158a3cb70df9f52e377c67019971d888b99147511f",
+    "zh:dc89ce4b63bfef708ec29c17e85ad0232a1794336dc54dd88c3ba0b77e764f71",
+    "zh:dd7dd18f1f8218c6cd19592288fde32dccc743cde05b9feeb2883f37c2ff4b4e",
+    "zh:ec4bd5ab3872dedb39fe528319b4bba609306e12ee90971495f109e142d66310",
+    "zh:f610ead42f724c82f5463e0e71fa735a11ffb6101880665d93f48b4a67b9ad82",
+  ]
+}
+
+provider "registry.terraform.io/goauthentik/authentik" {
+  version     = "2024.12.1"
+  constraints = "~> 2024.10"
+  hashes = [
+    "h1:roBMd+gi+TGgikH/bMzEI8JfvJiMAQWt+8FmokCrQIs=",
+    "zh:090260dc7889ea822ec1d899344e1ee23eba5290461989c0796149c9511f2316",
+    "zh:13c2655ff824b0dc4b9bb832b5ca6d41dba97cb280330258c5fef4115e236209",
+    "zh:166a73c3a810c9c895d68a8ff968158f339f8a2c1c03e20ec9fc5ed99cc64e20",
+    "zh:203777eae1cdc711233315499643180604cff2324411b186b7cf07fdbe16f655",
+    "zh:3b2f18c9a8d28dac74dc6bbf168c946855ab9c68f053578d4630c50d5eaf30a0",
+    "zh:4822275985f6b74b6196c47112316a4252db22cf4ceaef7c9ab4c66d488abf2f",
+    "zh:53ea97562666c8a5a2f6d63d418a302a7f8ee4b7bb7da35dedaa89aa5708b7f0",
+    "zh:56b8a230901e3550c92a1d3f58ee9dafe9853f30fe4315af3ab28ae63262e15d",
+    "zh:6293ab7b1fd8206a0c853591f50186aca4a1eff117b2a773e10760a23a2c83e9",
+    "zh:9433970f79fb92d8aae3ee436db5630ab312c78b6dc9df9c1db3273a18f8aaa1",
+    "zh:95df406214f79b3b98222d7c7fe8fc319a3d90b7a9d53e1d5abbda5dfb8b9436",
+    "zh:a85880da0552a42c8f449390fbd7d8b03541d1a13e04bba9f1404fa658754260",
+    "zh:a95f6e9bd62c67e70eba1b1a14728856b9a6a28cd1e5e3be54a7718882c87e7f",
+    "zh:dd599b51c5beb34a4c6feece244fde07d2558d69929449ab1fd39a5ebe738781",
+  ]
+}
+
+provider "registry.terraform.io/hashicorp/helm" {
+  version = "3.1.1"
+  hashes = [
+    "h1:47CqNwkxctJtL/N/JuEj+8QMg8mRNI/NWeKO5/ydfZU=",
+    "h1:5b2ojWKT0noujHiweCds37ZreRFRQLNaErdJLusJN88=",
+    "zh:1a6d5ce931708aec29d1f3d9e360c2a0c35ba5a54d03eeaff0ce3ca597cd0275",
+    "zh:3411919ba2a5941801e677f0fea08bdd0ae22ba3c9ce3309f55554699e06524a",
+    "zh:81b36138b8f2320dc7f877b50f9e38f4bc614affe68de885d322629dd0d16a29",
+    "zh:95a2a0a497a6082ee06f95b38bd0f0d6924a65722892a856cfd914c0d117f104",
+    "zh:9d3e78c2d1bb46508b972210ad706dd8c8b106f8b206ecf096cd211c54f46990",
+    "zh:a79139abf687387a6efdbbb04289a0a8e7eaca2bd91cdc0ce68ea4f3286c2c34",
+    "zh:aaa8784be125fbd50c48d84d6e171d3fb6ef84a221dbc5165c067ce05faab4c8",
+    "zh:afecd301f469975c9d8f350cc482fe656e082b6ab0f677d1a816c3c615837cc1",
+    "zh:c54c22b18d48ff9053d899d178d9ffef7d9d19785d9bf310a07d648b7aac075b",
+    "zh:db2eefd55aea48e73384a555c72bac3f7d428e24147bedb64e1a039398e5b903",
+    "zh:ee61666a233533fd2be971091cecc01650561f1585783c381b6f6e8a390198a4",
+    "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
+  ]
+}
+
+provider "registry.terraform.io/hashicorp/kubernetes" {
+  version = "3.1.0"
+  hashes = [
+    "h1:oodIAuFMikXNmEtil5MQgP4dfSctUBYQiGJfjbsF3NY=",
+    "zh:0215c5c60be62028c09a2f22458e89cda3ef5830a632299f1d401eb3538874b0",
+    "zh:09ebb9f442431e278a310a9423f32caf467cb4b3cad3fe59573ca71fa7b14e20",
+    "zh:0c4e5912f83bb35846ae0a9ae54fc320706ee61894cd21cc6b4181b1c5a2fa5c",
+    "zh:1678c982853ad461e65ccb5e79d585e13ed109dd47dab2a66d3a7a304faeef65",
+    "zh:1c050a5c15e330457a9c18caacf61a923c59d663e13f2962e4b32f04fef523a0",
+    "zh:2c55bcec83be58ec132c7cb0a1ac644758b800d794fdc636d53a0eada0358a3a",
+    "zh:a062bb0aa316c08d8460c66a5d68da71da40de5d3bc3b31abcf3a1a9a19650f1",
+    "zh:a26fdea0afaa9b247c73c0b42843ca51ba7db0ac2571f9d3d50dcabd20ca1b98",
+    "zh:c872c9385a78d502bf5823d61cd3bb0f9a0585030e025eb12585c83451beeaa1",
+    "zh:f180879af931182beee4c8c0d9dab62b81d86f17ddcbe3786ef4c7cec9163a4e",
+    "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c",
+    "zh:f70f5789264069e0eef06f9b5d5fde955ef7206f7d446d1ce51a4c37a3f3e02f",
+  ]
+}
+
+provider "registry.terraform.io/hashicorp/vault" {
+  version     = "4.8.0"
+  constraints = "~> 4.0"
+  hashes = [
+    "h1:GPfhH6dr1LY0foPBDYv9bEGifx7eSwYqFcEAOWOUxLk=",
+    "h1:aHqgWQhDBMeZO9iUKwJYMlh4q+xNMUlMIcjRbF4d02Y=",
+    "zh:269ab13433f67684012ae7e15876532b0312f5d0d2002a9cf9febb1279ce5ea6",
+    "zh:4babc95bf0c40eb85005db1dc2ca403c46be4a71dd3e409db3711a56f7a5ca0e",
+    "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3",
+    "zh:86e27c1c625ecc24446a11eeffc3ac319b36c2b4e51251db8579256a0dbcf136",
+    "zh:a32f31da94824009e26b077374440b52098aecb93c92ff55dc3d31dd37c4ea25",
+    "zh:be0a18c6c0425518bab4fbffd82078b82036a88503b5d76064de551c9f646cbf",
+    "zh:be5a77fdfd36863ebeec79cd12b1d13322ffad6821d157a0b279789fa06b5937",
+    "zh:be8317d142a3caad74c7d936039ae27076a1b2b8312ef5208e2871a5f525977c",
+    "zh:c94a84895a3d9954b80e983eed4603330a5cdbbd8eef5b3c99278c2d1402ef3c",
+    "zh:de1fb712784dd8415f011ca5346a34f87fab6046c730557615247e511dbc7d98",
+    "zh:e3eafae7da550f86cae395d6660b2a0e93ec8d2b0e0e5ef982ec762e961fc952",
+    "zh:ff35fb1ab6add288f0f368981e56f780b50405accd1937131cba1137999c8d83",
+  ]
+}
+
+provider "registry.terraform.io/telmate/proxmox" {
+  version     = "3.0.2-rc07"
+  constraints = "3.0.2-rc07"
+  hashes = [
+    "h1:zp5hpQJQ4t4zROSLqdltVpBO+Riy9VugtfFbpyTw1aM=",
+    "zh:2ee860cd0a368b3eaa53f4a9ea46f16dab8a97929e813ea6ef55183f8112c2ca",
+    "zh:415965fd915bae2040d7f79e45f64d6e3ae61149c10114efeac1b34687d7296c",
+    "zh:6584b2055df0e32062561c615e3b6b2c291ca8c959440adda09ef3ec1e1436bd",
+    "zh:65dcfad71928e0a8dd9befc22524ed686be5020b0024dc5cca5184c7420eeb6b",
+    "zh:7253dc29bd265d33f2791ac4f779c5413f16720bb717de8e6c5fcb2c858648ea",
+    "zh:7ec8993da10a47606670f9f67cfd10719a7580641d11c7aa761121c4a2bd66fb",
+    "zh:999a3f7a9dcf517967fc537e6ec930a8172203642fb01b8e1f78f908373db210",
+    "zh:a50e6df7280eb6584a5fd2456e3f5b6df13b2ec8a7fa4605511e438e1863be42",
+    "zh:b25b329a1e42681c509d027fee0365414f0cc5062b65690cfc3386aab16132ae",
+    "zh:c028877fdb438ece48f7bc02b65bbae9ca7b7befbd260e519ccab6c0cbb39f26",
+    "zh:cf0eaa3ea9fcc6d62793637947f1b8d7c885b6ad74695ab47e134e4ff132190f",
+    "zh:d5ade3fae031cc629b7c512a7b60e46570f4c41665e88a595d7efd943dde5ab2",
+    "zh:f388c15ad1ecfc09e7361e3b98bae9b627a3a85f7b908c9f40650969c949901c",
+    "zh:f415cc6f735a3971faae6ac24034afdb9ee83373ef8de19a9631c187d5adc7db",
+  ]
+}
--- a/stacks/nextcloud/chart_values.yaml
+++ b/stacks/nextcloud/chart_values.yaml
@ -0,0 +1,191 @@
+# image.tag is rendered dynamically (templatefile var `image_tag`) from the
+# CURRENT live Deployment tag, falling back to var.nextcloud_image_tag_floor
+# (32.0.9) on fresh install / DR — see stacks/nextcloud/main.tf
+# `data.kubernetes_resource.nextcloud_live` + locals. This makes helm upgrades
+# image-no-ops in steady state and means a re-render can NEVER downgrade below
+# the Keel-bumped live tag (the 2026-06-01 CrashLoop: a pinned 32.0.3 lost to
+# live 32.0.9 and Nextcloud refused the downgrade). Keel (keel.sh/policy=minor)
+# bumps the live tag upward within major 32; the next apply just follows it.
+# flavor=apache renders the bare apache-default tag (live image is
+# `nextcloud:<tag>`, no -apache suffix).
+image:
+  flavor: apache
+  tag: "${image_tag}"
+
+nextcloud:
+  host: nextcloud.viktorbarzin.me
+  trustedDomains:
+    - nextcloud.viktorbarzin.me
+  # mail:
+  #   enabled: true
+  #   # the user we send email as
+  #   fromAddress: nextcloud@viktorbarzin.me
+  #   # the domain we send email from
+  #   domain: viktorbarzin.me
+  #   smtp:
+  #     host: mail.viktorbarzin.me
+  #     secure: starttls
+  #     port: 587
+  #     authtype: LOGIN
+  #     name: nextcloud@viktorbarzin.me
+  #     password:
+  extraEnv:
+    - name: TRUSTED_PROXIES
+      value: "10.0.0.0/8"
+    - name: PHP_MEMORY_LIMIT
+      value: "512M"
+    - name: PHP_UPLOAD_LIMIT
+      value: "16G"
+    # - name: mail_smtpdebug
+    #   value: "true"
+    # - name: loglevel
+    #   value: "0"
+  configs:
+    zzz-redis.config.php: |
+      <?php
+      // Redis via HAProxy master-only service. HAProxy (3 replicas, PDB
+      // minAvailable=2) health-checks all v2 pods via `INFO replication` and
+      // routes to the current role:master. Sentinel failover takes <30s, and
+      // HAProxy detects the new master via its 1s tcp-check interval and
+      // starts routing within ~3s of detection. Removed the old in-process
+      // sentinel-query loop on 2026-04-19 after the Redis rework — see
+      // beads code-v2b and infra/docs/architecture/databases.md.
+      $CONFIG = array(
+        'memcache.distributed' => '\\OC\\Memcache\\Redis',
+        'memcache.locking' => '\\OC\\Memcache\\Redis',
+        'redis' => array(
+          'host' => 'redis-master.redis.svc.cluster.local',
+          'port' => 6379,
+          'password' => '',
+          'timeout' => 1.5,
+          'read_timeout' => 1.5,
+        ),
+      );
+    performance.config.php: |
+      <?php
+      $CONFIG = array(
+        'loglevel' => 2,
+        // Cap + rotate nextcloud.log. Without this it grew unbounded to
+        // 10GB+ and bloated every backup (2026-06-01 space incident).
+        // At 10MB the log rotates to nextcloud.log.1 (1 kept) → ~20MB max.
+        'log_rotate_size' => 10485760,
+        'mail_smtpdebug' => false,
+      );
+    zzz-mysql.config.php: |
+      <?php
+      $CONFIG = array(
+        'mysql.utf8mb4' => true,
+      );
+  phpConfigs:
+    zzz-custom.ini: |
+      max_execution_time = 300
+      max_input_time = 300
+      default_socket_timeout = 300
+      opcache.enable_file_override = 1
+      apc.shm_size = 128M
+  extraVolumes:
+    - name: apache-tuning
+      configMap:
+        name: nextcloud-apache-tuning
+    - name: db-password-sync
+      configMap:
+        name: nextcloud-db-password-sync
+        defaultMode: 0755
+    - name: pve-nfs
+      persistentVolumeClaim:
+        claimName: nextcloud-pve-nfs-root
+    - name: pve-nfs-ssd
+      persistentVolumeClaim:
+        claimName: nextcloud-pve-nfs-ssd-root
+  extraVolumeMounts:
+    - name: apache-tuning
+      mountPath: /etc/apache2/mods-available/mpm_prefork.conf
+      subPath: mpm_prefork.conf
+    - name: db-password-sync
+      mountPath: /docker-entrypoint-hooks.d/before-starting
+    - name: pve-nfs
+      mountPath: /mnt/pve-nfs
+    - name: pve-nfs-ssd
+      mountPath: /mnt/pve-nfs-ssd
+
+internalDatabase:
+  enabled: false
+
+externalRedis:
+  enabled: false
+
+externalDatabase:
+  enabled: true
+  type: mysql
+  host: ${mysql_host}
+  user: nextcloud
+  database: nextcloud
+  existingSecret:
+    secretName: nextcloud-db-creds
+    usernameKey: db-username
+    passwordKey: DB_PASSWORD
+
+persistence:
+  enabled: true
+  existingClaim: nextcloud-data-encrypted
+
+  accessMode: ReadWriteOnce
+  size: 20Gi
+
+startupProbe:
+  enabled: true
+  initialDelaySeconds: 30
+  periodSeconds: 10
+  timeoutSeconds: 30
+  failureThreshold: 60
+  successThreshold: 1
+
+livenessProbe:
+  enabled: true
+  initialDelaySeconds: 30
+  periodSeconds: 60
+  timeoutSeconds: 30
+  failureThreshold: 10
+  successThreshold: 1
+
+readinessProbe:
+  enabled: true
+  initialDelaySeconds: 30
+  periodSeconds: 60
+  timeoutSeconds: 30
+  failureThreshold: 5
+  successThreshold: 1
+
+podAnnotations:
+  diun.enable: "true"
+  diun.include_tags: "^[0-9]+(?:.[0-9]+)?(?:.[0-9]+)?.*"
+  dependency.kyverno.io/wait-for: "mysql.dbaas:3306,redis-master.redis:6379"
+  secret.reloader.stakater.com/reload: "nextcloud-db-creds"
+
+# OnRootMismatch: kubelet only recursively chowns the volume to fsGroup if the
+# root dir's GID doesn't already match. Without this, every pod restart triggers
+# a ~30-min recursive chown of /srv/nfs and /srv/nfs-ssd (600k+ files) — the
+# default policy "Always" recurses every time. Locks fsGroup=33 explicitly so
+# this block fully replaces the chart's default {fsGroup: 33}.
+securityContext:
+  fsGroup: 33
+  fsGroupChangePolicy: OnRootMismatch
+
+collabora:
+  enabled: false # Using onlyoffice instead
+
+resources:
+  limits:
+    memory: 8Gi
+  requests:
+    cpu: 50m
+    memory: 256Mi
+
+cronjob:
+  enabled: true
+  resources:
+    limits:
+      memory: 384Mi
+    requests:
+      cpu: 25m
+      memory: 384Mi
--- a/stacks/nextcloud/external_storage.tf
+++ b/stacks/nextcloud/external_storage.tf
@ -0,0 +1,322 @@
+# Nextcloud Files External bootstrap — mount-per-archive + applicable_users model.
+# Creates two admin-only root browser mounts (PVE NFS Pool, PVE NFS-SSD Pool)
+# pointing at the NFS roots mounted at /mnt/pve-nfs and /mnt/pve-nfs-ssd inside
+# the Nextcloud container, plus per-archive mounts visible only to the named
+# users. Safe to re-run — the bootstrap Job is idempotent.
+#
+# ACL model (verified via context7 + NC docs):
+#   Mount visibility is controlled by `occ files_external:applicable`.
+#   A mount with no applicable users/groups is visible to ALL users — so we
+#   always set at least one applicable group (admin) or user list.
+#
+# occ commands used (syntax verified via context7):
+#   files_external:create <mountPoint> local null::null --config "datadir=<dir>"
+#   files_external:list --output=json   → array; each entry has numeric .mount_id,
+#                                          .applicable_users [], .applicable_groups []
+#   files_external:applicable <mountId> --add-user=<user>
+#   files_external:applicable <mountId> --remove-user=<user>
+#   files_external:applicable <mountId> --add-group=<group>
+#   files_external:applicable <mountId> --remove-group=<group>
+#
+# Note: `files_external:applicable` has NO --output=json flag (write-only command).
+# Current applicable state is read from files_external:list --output=json instead.
+#
+# NO Files Access Control. Drop the workflow-engine machinery entirely.
+
+# ── External storage manifest (JSON) ────────────────────────────────────────
+
+resource "kubernetes_config_map_v1" "nextcloud_external_storage_manifest" {
+  metadata {
+    name      = "nextcloud-external-storage-manifest"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+
+  data = {
+    "manifest.json" = jsonencode({
+      # enableSharing: lets users right-click a folder inside the mount and
+      # share it with another NC user/group/public link. NC defaults to false
+      # for local-backend mounts; we opt-in per-mount. Currently true on the
+      # admin pool browsers (admin uses them as a "share-from picker"); false
+      # on /anca-elements (anca manages her own re-sharing inside her view).
+      rootMounts = [
+        {
+          mountPoint      = "/PVE NFS Pool"
+          dataDir         = "/mnt/pve-nfs"
+          applicableGroup = "admin"
+          enableSharing   = true
+        },
+        {
+          mountPoint      = "/PVE NFS-SSD Pool"
+          dataDir         = "/mnt/pve-nfs-ssd"
+          applicableGroup = "admin"
+          enableSharing   = true
+        },
+      ]
+      archiveMounts = [
+        {
+          mountPoint       = "/anca-elements"
+          dataDir          = "/mnt/pve-nfs/anca-elements"
+          # NC usernames (not display names): admin is Viktor, anca is Anca.
+          applicableUsers  = ["anca", "admin"]
+          applicableGroups = []
+          enableSharing    = false
+        },
+      ]
+    })
+  }
+}
+
+# ── RBAC for the bootstrap Job ───────────────────────────────────────────────
+
+resource "kubernetes_service_account" "nextcloud_external_storage_bootstrap" {
+  metadata {
+    name      = "nextcloud-external-storage-bootstrap"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+}
+
+resource "kubernetes_role" "nextcloud_external_storage_bootstrap" {
+  metadata {
+    name      = "nextcloud-external-storage-bootstrap"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+
+  rule {
+    api_groups = [""]
+    resources  = ["pods"]
+    verbs      = ["list", "get", "watch"]
+  }
+
+  rule {
+    api_groups = [""]
+    resources  = ["pods/exec"]
+    verbs      = ["create"]
+  }
+}
+
+resource "kubernetes_role_binding" "nextcloud_external_storage_bootstrap" {
+  metadata {
+    name      = "nextcloud-external-storage-bootstrap"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+
+  role_ref {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "Role"
+    name      = kubernetes_role.nextcloud_external_storage_bootstrap.metadata[0].name
+  }
+
+  subject {
+    kind      = "ServiceAccount"
+    name      = kubernetes_service_account.nextcloud_external_storage_bootstrap.metadata[0].name
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+}
+
+# ── Bootstrap Job ────────────────────────────────────────────────────────────
+
+resource "kubernetes_job_v1" "nextcloud_external_storage_bootstrap" {
+  # The bootstrap script (below) waits up to 10m for the NC pod to be Ready.
+  # kubernetes_job_v1's default create timeout is only 1m, which spuriously
+  # fails the apply whenever the NC pod takes >1m to come up — e.g. now that
+  # Keel auto-upgrades nextcloud, a bump mid-apply runs `occ upgrade` in the
+  # entrypoint and delays readiness past 1m (observed 2026-06-01). Match the
+  # script's 10m wait plus margin.
+  timeouts {
+    create = "12m"
+  }
+
+  metadata {
+    name      = "nextcloud-external-storage-bootstrap"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+
+  spec {
+    backoff_limit              = 5
+    ttl_seconds_after_finished = 600
+
+    template {
+      metadata {}
+      spec {
+        restart_policy       = "OnFailure"
+        service_account_name = kubernetes_service_account.nextcloud_external_storage_bootstrap.metadata[0].name
+
+        container {
+          name  = "bootstrap"
+          image = "bitnami/kubectl:latest"
+
+          # bitnami/kubectl (debian-12 base) ships jq — no apt-get needed.
+          # HCL heredoc: only $${...} needs escaping; bare $VAR and $(...)
+          # are passed through unchanged by HCL. No nested heredocs used.
+          command = ["/bin/bash", "-c", <<-EOF
+            set -euo pipefail
+            trap 'echo "[bootstrap] FAIL at line $LINENO — exit $?"' ERR
+
+            MANIFEST=/manifest/manifest.json
+            NC_NS=nextcloud
+            NC_LABEL="app.kubernetes.io/name=nextcloud"
+
+            # ── 1. Wait for NC pod to be Ready ──────────────────────────────
+            echo "[bootstrap] Waiting for NC pod Ready (timeout 10m)..."
+            kubectl wait -n "$NC_NS" pod \
+              -l "$NC_LABEL" \
+              --for=condition=Ready \
+              --timeout=600s
+            echo "[bootstrap] Pod is Ready."
+
+            # ── 2. Resolve pod name ─────────────────────────────────────────
+            NC_POD=$(kubectl get pods -n "$NC_NS" -l "$NC_LABEL" \
+              -o jsonpath='{.items[0].metadata.name}')
+            echo "[bootstrap] Target pod: $NC_POD"
+
+            # ── 3. occ helper — must run as www-data ────────────────────────
+            nc_occ() {
+              kubectl exec -n "$NC_NS" "$NC_POD" -c nextcloud -- \
+                runuser -u www-data -- php /var/www/html/occ "$@"
+            }
+
+            # ── 4. Enable files_external (idempotent) ───────────────────────
+            nc_occ app:enable files_external || true
+            # NO files_accesscontrol — that app is not used in this model.
+
+            # ── 5. Helpers ──────────────────────────────────────────────────
+
+            # get_mount_id <mountPoint>
+            # Reads files_external:list --output=json (array of mount objects).
+            # Each object has a numeric "mount_id" and a string "mount_point".
+            get_mount_id() {
+              local MP="$1"
+              nc_occ files_external:list --output=json 2>/dev/null \
+                | jq -r --arg mp "$MP" \
+                    '.[] | select(.mount_point == $mp) | .mount_id' \
+                | head -1
+            }
+
+            # ensure_mount <mountPoint> <dataDir> → echoes the numeric mount id
+            ensure_mount() {
+              local MP="$1" DIR="$2"
+              local MID
+              MID=$(get_mount_id "$MP")
+              if [ -z "$MID" ]; then
+                echo "[bootstrap] Creating mount '$MP' -> $DIR" >&2
+                nc_occ files_external:create "$MP" local null::null \
+                  --config "datadir=$DIR"
+                MID=$(get_mount_id "$MP")
+              else
+                echo "[bootstrap] Mount '$MP' already exists (id=$MID)" >&2
+              fi
+              echo "$MID"
+            }
+
+            # sync_applicable <mountId> <desiredUsersJSON> <desiredGroupsJSON>
+            # Reads current applicable state from files_external:list --output=json
+            # (fields: applicable_users [], applicable_groups []).
+            # Diffs against desired sets; adds missing, removes extras.
+            # When no applicable users + no groups are set, NC treats the mount
+            # as visible to ALL — so desired sets must always be non-empty.
+            #
+            # Process substitution `< <(jq ...)` feeds the loops directly: when
+            # jq emits no rows (already-synced state), the body never runs and
+            # the loop returns 0 — avoiding a set -e exit on a no-op re-run.
+            sync_applicable() {
+              local MID="$1" DESIRED_USERS_JSON="$2" DESIRED_GROUPS_JSON="$3"
+
+              # Read current state from files_external:list --output=json
+              local MOUNT_JSON
+              MOUNT_JSON=$(nc_occ files_external:list --output=json 2>/dev/null \
+                | jq -c --argjson mid "$MID" '.[] | select(.mount_id == $mid)')
+
+              local CURRENT_USERS_JSON CURRENT_GROUPS_JSON
+              CURRENT_USERS_JSON=$(echo "$MOUNT_JSON" \
+                | jq -c '.applicable_users // []')
+              CURRENT_GROUPS_JSON=$(echo "$MOUNT_JSON" \
+                | jq -c '.applicable_groups // []')
+
+              while IFS= read -r U; do
+                nc_occ files_external:applicable "$MID" --add-user="$U"
+              done < <(jq -rn \
+                --argjson d "$DESIRED_USERS_JSON" \
+                --argjson c "$CURRENT_USERS_JSON" \
+                '($d - $c)[]')
+
+              while IFS= read -r U; do
+                nc_occ files_external:applicable "$MID" --remove-user="$U"
+              done < <(jq -rn \
+                --argjson d "$DESIRED_USERS_JSON" \
+                --argjson c "$CURRENT_USERS_JSON" \
+                '($c - $d)[]')
+
+              while IFS= read -r G; do
+                nc_occ files_external:applicable "$MID" --add-group="$G"
+              done < <(jq -rn \
+                --argjson d "$DESIRED_GROUPS_JSON" \
+                --argjson c "$CURRENT_GROUPS_JSON" \
+                '($d - $c)[]')
+
+              while IFS= read -r G; do
+                nc_occ files_external:applicable "$MID" --remove-group="$G"
+              done < <(jq -rn \
+                --argjson d "$DESIRED_GROUPS_JSON" \
+                --argjson c "$CURRENT_GROUPS_JSON" \
+                '($c - $d)[]')
+            }
+
+            # sync_option <mountId> <key> <value>
+            # Reconciles a single mount option. occ files_external:option is
+            # idempotent (no error on setting same value), so we always write.
+            sync_option() {
+              nc_occ files_external:option "$1" "$2" "$3" >/dev/null
+            }
+
+            # ── 6. Process root mounts (admin group only) ───────────────────
+            ROOT_COUNT=$(jq '.rootMounts | length' "$MANIFEST")
+            for i in $(seq 0 $((ROOT_COUNT - 1))); do
+              MP=$(jq -r ".rootMounts[$i].mountPoint" "$MANIFEST")
+              DIR=$(jq -r ".rootMounts[$i].dataDir" "$MANIFEST")
+              GROUP=$(jq -r ".rootMounts[$i].applicableGroup" "$MANIFEST")
+              ENABLE_SHARING=$(jq -r ".rootMounts[$i].enableSharing // false" "$MANIFEST")
+              MID=$(ensure_mount "$MP" "$DIR")
+              sync_applicable "$MID" '[]' "[\"$GROUP\"]"
+              sync_option "$MID" enable_sharing "$ENABLE_SHARING"
+            done
+
+            # ── 7. Process archive mounts (per-user / per-group) ───────────
+            ARCH_COUNT=$(jq '.archiveMounts | length' "$MANIFEST")
+            for i in $(seq 0 $((ARCH_COUNT - 1))); do
+              MP=$(jq -r ".archiveMounts[$i].mountPoint" "$MANIFEST")
+              DIR=$(jq -r ".archiveMounts[$i].dataDir" "$MANIFEST")
+              USERS_JSON=$(jq -c ".archiveMounts[$i].applicableUsers // []" "$MANIFEST")
+              GROUPS_JSON=$(jq -c ".archiveMounts[$i].applicableGroups // []" "$MANIFEST")
+              ENABLE_SHARING=$(jq -r ".archiveMounts[$i].enableSharing // false" "$MANIFEST")
+              MID=$(ensure_mount "$MP" "$DIR")
+              sync_applicable "$MID" "$USERS_JSON" "$GROUPS_JSON"
+              sync_option "$MID" enable_sharing "$ENABLE_SHARING"
+            done
+
+            echo "[bootstrap] Bootstrap complete."
+          EOF
+          ]
+
+          volume_mount {
+            name       = "manifest"
+            mount_path = "/manifest"
+          }
+        }
+
+        volume {
+          name = "manifest"
+          config_map {
+            name = kubernetes_config_map_v1.nextcloud_external_storage_manifest.metadata[0].name
+          }
+        }
+      }
+    }
+  }
+
+  depends_on = [helm_release.nextcloud]
+
+  lifecycle {
+    # KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
+    ignore_changes = [spec[0].template[0].spec[0].dns_config]
+  }
+}
--- a/stacks/nextcloud/main.tf
+++ b/stacks/nextcloud/main.tf
@ -0,0 +1,729 @@
+variable "tls_secret_name" {
+  type      = string
+  sensitive = true
+}
+variable "nfs_server" { type = string }
+variable "redis_host" { type = string }
+variable "mysql_host" { type = string }
+
+# FLOOR only — Keel bumps the LIVE image tag upward (minor policy); the
+# data source below renders the current live tag so a helm apply never
+# downgrades below what Keel installed. This floor only wins on a fresh
+# install / DR (no live Deployment) or after deliberately restoring an
+# OLDER DB snapshot (bump this to match — see comment on the data source).
+variable "nextcloud_image_tag_floor" {
+  type    = string
+  default = "32.0.9"
+}
+
+data "vault_kv_secret_v2" "secrets" {
+  mount = "secret"
+  name  = "nextcloud"
+}
+
+# Render the CURRENT live image tag so helm upgrades are image-no-ops and
+# can NEVER downgrade below the Keel-bumped live tag (failure mode F2: the
+# 2026-06-01 CrashLoop where a pinned 32.0.3 re-render lost to live 32.0.9).
+# Helm-managed workloads can't use the raw-Deployment KEEL_IGNORE_IMAGE
+# `lifecycle.ignore_changes` trick (immich/freshrss main.tf), so we feed the
+# live tag back into the chart instead.
+#
+# Use the PLURAL `kubernetes_resources` (field-selected to name=nextcloud), NOT
+# the singular `kubernetes_resource`: in kubernetes provider 3.1.0 the singular
+# data source ERRORS ("Provider produced null object") when the target is
+# absent, and try() can't rescue it (the failure is at the provider read, not
+# the expression). The plural returns an empty `objects` list on no match, so
+# objects[0] + try() cleanly falls back to var.nextcloud_image_tag_floor on
+# fresh install / DR. (Verified empirically against provider 3.1.0.)
+#
+# namespace is the LITERAL "nextcloud", NOT
+# kubernetes_namespace.nextcloud.metadata[0].name, on purpose: referencing the
+# namespace resource makes Terraform defer this data read to apply time
+# whenever the namespace has a pending change (e.g. the keel.sh/enrolled label
+# add) — "(depends on a resource ... with changes pending)" — which leaves the
+# tag unknown at plan, turning every helm plan into an unverifiable
+# (known after apply) values churn. A static namespace decouples the read so it
+# resolves at plan time.
+data "kubernetes_resources" "nextcloud_live" {
+  api_version    = "apps/v1"
+  kind           = "Deployment"
+  namespace      = "nextcloud"
+  field_selector = "metadata.name=nextcloud"
+}
+
+locals {
+  homepage_credentials = jsondecode(data.vault_kv_secret_v2.secrets.data["homepage_credentials"])
+
+  _live_image = try(data.kubernetes_resources.nextcloud_live.objects[0].spec.template.spec.containers[0].image, "")
+  # Last colon-segment is the tag (handles registry:port/repo:tag); strip the
+  # optional `-apache` flavor suffix so it round-trips through the chart's
+  # `image.flavor=apache` (which renders the bare apache-default tag).
+  _live_tag           = try(replace(element(split(":", local._live_image), length(split(":", local._live_image)) - 1), "-apache", ""), "")
+  nextcloud_image_tag = local._live_tag != "" ? local._live_tag : var.nextcloud_image_tag_floor
+}
+
+
+module "tls_secret" {
+  source          = "../../modules/kubernetes/setup_tls_secret"
+  namespace       = kubernetes_namespace.nextcloud.metadata[0].name
+  tls_secret_name = var.tls_secret_name
+}
+
+resource "kubernetes_namespace" "nextcloud" {
+  metadata {
+    name = "nextcloud"
+    labels = {
+      "istio-injection" : "disabled"
+      tier                                    = local.tiers.edge
+      "resource-governance/custom-limitrange" = "true"
+      "resource-governance/custom-quota"      = "true"
+      # Keel re-enabled 2026-06-01 (was disabled after the 2026-05-26 bump
+      # 32.0.3→32.0.9 stuck the pod in maintenance mode for ~22h). Two
+      # safeguards make auto-upgrade safe, engineered around BOTH failure modes:
+      #   F1 — interrupted `occ upgrade` (entrypoint copies version.php before
+      #        occ upgrade finishes, so a probe-restart mid-upgrade leaves the
+      #        DB half-migrated → 503): the nextcloud-watchdog CronJob below
+      #        self-heals by running `occ upgrade` when occ reports
+      #        needsDbUpgrade=true.
+      #   F2 — helm re-renders a tag BELOW the Keel-bumped live image →
+      #        Nextcloud refuses the downgrade → CrashLoop (the 2026-06-01
+      #        incident): chart_values renders the live tag with a floor, so a
+      #        re-render is never below live.
+      # Scope: the shared Kyverno `inject-keel-annotations` policy stamps
+      # keel.sh/policy=patch (+ trigger=poll + pollSchedule) on enrolled
+      # workloads. For Nextcloud patch == minor in practice — it only ships
+      # 32.0.x maintenance releases (never 32.1.x), and major 33 needs `major`
+      # policy and stays manual (the entrypoint's +1-major limit enforces that
+      # anyway). We deliberately do NOT override the policy per-workload — see
+      # the note where the old override resources used to live, below.
+      "keel.sh/enrolled" = "true"
+    }
+  }
+  lifecycle {
+    # KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
+    ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
+  }
+}
+
+# No per-workload Keel override resources here, on purpose. Nextcloud is
+# enrolled via the namespace label above; the shared Kyverno
+# `inject-keel-annotations` policy then stamps keel.sh/policy=patch +
+# trigger=poll + pollSchedule, and Keel auto-upgrades within 32.0.x.
+#
+# This stack used to carry kubernetes_labels + kubernetes_annotations
+# resources forcing keel.sh/policy=minor (and before that =never, for the
+# opt-out). Both were removed 2026-06-01 after re-enabling Keel because each
+# produced perpetual drift:
+#   - Kyverno's background-controller overwrites a TF-set policy back to
+#     `patch` despite the policy's `+(keel.sh/policy)` add-if-missing anchor
+#     (observed live: the annotation's field manager was background-controller
+#     with value patch right after a Keel-bump admission).
+#   - The helm release strips the deployment's keel.sh/policy LABEL on every
+#     roll, so TF re-added it on every apply.
+# patch == minor for Nextcloud (32.0.x only; major 33 needs `major` and stays
+# manual), so letting Kyverno own the keel annotations — exactly like every
+# other enrolled workload (immich, freshrss) — is both correct and drift-free.
+
+resource "kubernetes_manifest" "external_secret" {
+  manifest = {
+    apiVersion = "external-secrets.io/v1beta1"
+    kind       = "ExternalSecret"
+    metadata = {
+      name      = "nextcloud-secrets"
+      namespace = "nextcloud"
+    }
+    spec = {
+      refreshInterval = "15m"
+      secretStoreRef = {
+        name = "vault-kv"
+        kind = "ClusterSecretStore"
+      }
+      target = {
+        name = "nextcloud-secrets"
+      }
+      dataFrom = [{
+        extract = {
+          key = "nextcloud"
+        }
+      }]
+    }
+  }
+  depends_on = [kubernetes_namespace.nextcloud]
+}
+
+# DB credentials from Vault database engine (rotated every 24h)
+# Nextcloud Helm chart reads password at runtime via existingSecret reference
+resource "kubernetes_manifest" "db_external_secret" {
+  manifest = {
+    apiVersion = "external-secrets.io/v1beta1"
+    kind       = "ExternalSecret"
+    metadata = {
+      name      = "nextcloud-db-creds"
+      namespace = "nextcloud"
+    }
+    spec = {
+      refreshInterval = "15m"
+      secretStoreRef = {
+        name = "vault-database"
+        kind = "ClusterSecretStore"
+      }
+      target = {
+        name = "nextcloud-db-creds"
+        template = {
+          data = {
+            DB_PASSWORD = "{{ .password }}"
+            db-username = "nextcloud"
+          }
+        }
+      }
+      data = [{
+        secretKey = "password"
+        remoteRef = {
+          key      = "static-creds/mysql-nextcloud"
+          property = "password"
+        }
+      }]
+    }
+  }
+  depends_on = [kubernetes_namespace.nextcloud]
+}
+
+resource "kubernetes_resource_quota" "nextcloud" {
+  metadata {
+    name      = "nextcloud-quota"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+  spec {
+    hard = {
+      "requests.cpu"    = "4"
+      "requests.memory" = "8Gi"
+      "limits.memory"   = "16Gi"
+      pods              = "10"
+    }
+  }
+}
+
+resource "kubernetes_limit_range" "nextcloud" {
+  metadata {
+    name      = "nextcloud-limits"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+  spec {
+    limit {
+      type = "Container"
+      default = {
+        memory = "256Mi"
+      }
+      default_request = {
+        cpu    = "25m"
+        memory = "64Mi"
+      }
+      max = {
+        memory = "8Gi"
+      }
+    }
+  }
+}
+
+resource "helm_release" "nextcloud" {
+  namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  name      = "nextcloud"
+
+  repository = "https://nextcloud.github.io/helm/"
+  chart      = "nextcloud"
+  atomic     = true
+  version    = "8.8.1"
+
+  values     = [templatefile("${path.module}/chart_values.yaml", { tls_secret_name = var.tls_secret_name, mysql_host = var.mysql_host, image_tag = local.nextcloud_image_tag })]
+  timeout    = 6000
+  depends_on = [kubernetes_manifest.db_external_secret]
+}
+
+resource "kubernetes_config_map" "apache_tuning" {
+  metadata {
+    name      = "nextcloud-apache-tuning"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+  data = {
+    "mpm_prefork.conf" = <<-EOF
+      # Tuned for Nextcloud on MySQL
+      # Capped MaxRequestWorkers to prevent runaway Apache consuming all node CPU
+      <IfModule mpm_prefork_module>
+        StartServers            5
+        MinSpareServers         3
+        MaxSpareServers         10
+        MaxRequestWorkers       30
+        MaxConnectionsPerChild  500
+      </IfModule>
+    EOF
+  }
+}
+
+# resource "kubernetes_config_map" "config" {
+#   metadata {
+#     name      = "config"
+#    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+
+#     annotations = {
+#       "reloader.stakater.com/match" = "true"
+#     }
+#   }
+
+#   data = {
+#     "conf.yml" = file("${path.module}/conf.yml")
+#   }
+# }
+
+resource "kubernetes_persistent_volume_claim" "nextcloud_data_encrypted" {
+  wait_until_bound = false
+  metadata {
+    name      = "nextcloud-data-encrypted"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+    annotations = {
+      "resize.topolvm.io/threshold"     = "10%"
+      "resize.topolvm.io/increase"      = "20%"
+      "resize.topolvm.io/storage_limit" = "100Gi"
+    }
+  }
+  spec {
+    access_modes       = ["ReadWriteOnce"]
+    storage_class_name = "proxmox-lvm-encrypted"
+    resources {
+      requests = {
+        storage = "20Gi"
+      }
+    }
+  }
+  lifecycle {
+    # The autoresizer expands requests.storage up to storage_limit and
+    # PVCs can't shrink. Without this, every TF apply tries to revert
+    # to the spec value, K8s rejects the shrink, and the PVC ends up
+    # in Terminating-but-in-use limbo.
+    ignore_changes = [spec[0].resources[0].requests]
+  }
+}
+
+module "nfs_nextcloud_backup_host" {
+  source     = "../../modules/kubernetes/nfs_volume"
+  name       = "nextcloud-backup-host"
+  namespace  = kubernetes_namespace.nextcloud.metadata[0].name
+  nfs_server = "192.168.1.127"
+  nfs_path   = "/srv/nfs/nextcloud-backup"
+}
+
+module "nfs_pve_root_host" {
+  source     = "../../modules/kubernetes/nfs_volume"
+  name       = "nextcloud-pve-nfs-root"
+  namespace  = kubernetes_namespace.nextcloud.metadata[0].name
+  nfs_server = "192.168.1.127"
+  nfs_path   = "/srv/nfs"
+  storage    = "3000Gi"
+}
+
+module "nfs_pve_ssd_root_host" {
+  source     = "../../modules/kubernetes/nfs_volume"
+  name       = "nextcloud-pve-nfs-ssd-root"
+  namespace  = kubernetes_namespace.nextcloud.metadata[0].name
+  nfs_server = "192.168.1.127"
+  nfs_path   = "/srv/nfs-ssd"
+  storage    = "100Gi"
+}
+
+module "ingress" {
+  source = "../../modules/kubernetes/ingress_factory"
+  # Native WebDAV / CalDAV / CardDAV clients (Nextcloud desktop+mobile apps,
+  # calendar sync) use HTTP basic-auth + app passwords, not browser sessions.
+  # Nextcloud has strong app-layer auth of its own.
+  # auth = "app": Native WebDAV / CalDAV / CardDAV clients use HTTP Basic auth + app passwords; Nextcloud enforces app-layer authentication.
+  auth            = "app"
+  dns_type        = "proxied"
+  namespace       = kubernetes_namespace.nextcloud.metadata[0].name
+  name            = "nextcloud"
+  tls_secret_name = var.tls_secret_name
+  port            = 8080
+  extra_annotations = {
+    "gethomepage.dev/enabled"         = "true"
+    "gethomepage.dev/name"            = "Nextcloud"
+    "gethomepage.dev/description"     = "Cloud productivity suite"
+    "gethomepage.dev/icon"            = "nextcloud.png"
+    "gethomepage.dev/group"           = "Productivity"
+    "gethomepage.dev/pod-selector"    = ""
+    "gethomepage.dev/widget.type"     = "nextcloud"
+    "gethomepage.dev/widget.url"      = "https://nextcloud.viktorbarzin.me"
+    "gethomepage.dev/widget.username" = local.homepage_credentials["nextcloud"]["username"]
+    "gethomepage.dev/widget.password" = local.homepage_credentials["nextcloud"]["password"]
+  }
+}
+
+
+# Hook script: sync DB password from env var into config.php on every pod start.
+# Closes the Vault rotation gap: Vault rotates MySQL password → ESO syncs to K8s Secret →
+# Reloader restarts pod → this hook patches config.php with the current MYSQL_PASSWORD.
+resource "kubernetes_config_map" "db_password_sync_hook" {
+  metadata {
+    name      = "nextcloud-db-password-sync"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+
+  data = {
+    "sync-db-password.sh" = <<-EOF
+      #!/bin/bash
+      set -e
+      CONFIG="/var/www/html/config/config.php"
+      if [ -z "$MYSQL_PASSWORD" ]; then
+        echo "MYSQL_PASSWORD not set, skipping config.php sync"
+        exit 0
+      fi
+      if [ ! -f "$CONFIG" ]; then
+        echo "config.php not found, skipping (first install)"
+        exit 0
+      fi
+      CURRENT_PW=$(php -r "include '$CONFIG'; echo \$CONFIG['dbpassword'] ?? '';")
+      if [ "$CURRENT_PW" = "$MYSQL_PASSWORD" ]; then
+        echo "DB password in config.php already matches MYSQL_PASSWORD"
+        exit 0
+      fi
+      echo "Updating DB password in config.php to match MYSQL_PASSWORD..."
+      php /docker-entrypoint-hooks.d/before-starting/patch-db-pw.php "$CONFIG" "$MYSQL_PASSWORD"
+      echo "DB password updated successfully"
+    EOF
+
+    "patch-db-pw.php" = <<-EOF
+      <?php
+      $file = $argv[1];
+      $newPw = $argv[2];
+      $content = file_get_contents($file);
+      $escaped = str_replace(["'", "\\"], ["\\'", "\\\\"], $newPw);
+      $content = preg_replace("/'dbpassword'\\s*=>\\s*'[^']*'/", "'dbpassword' => '" . $escaped . "'", $content);
+      file_put_contents($file, $content);
+    EOF
+  }
+}
+
+resource "kubernetes_config_map" "backup-script" {
+  metadata {
+    name      = "nextcloud-backup-script"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+
+  data = {
+    "backup.sh" = <<-EOF
+      #!/bin/bash
+      set -e
+
+      BACKUP_DIR="/backup"
+      DATA_DIR="/nextcloud-data"
+      DATE=$(date +%Y%m%d_%H%M%S)
+      BACKUP_PATH="$BACKUP_DIR/$DATE"
+
+      echo "Starting Nextcloud backup at $(date)"
+
+      # Note: Maintenance mode is skipped because occ is not available in the NFS mount.
+      # For a proper backup with maintenance mode, exec into the nextcloud pod:
+      #   kubectl exec -n nextcloud deployment/nextcloud -- php occ maintenance:mode --on
+
+      # Create backup directory
+      mkdir -p "$BACKUP_PATH"
+
+      # Backup config/data/custom_apps. Exclusions (2026-06-01 space fix):
+      #  - nextcloud.log* — rotated at source via log_rotate_size; previously
+      #    grew to 10GB+ and bloated every dated copy (backups hit 20G each).
+      #  - preview cache — regenerable thumbnails, no need to back up.
+      # Backs up config/, data/, custom_apps/ (the irreplaceable bits). Skips:
+      #  - html/ — the Nextcloud app code, reproducible from the pinned image
+      #    (real config is at config/config.php; html/config/config.php is empty).
+      #  - nextcloud.log* — capped at source via log_rotate_size; was 10GB+.
+      #  - preview cache — regenerable thumbnails.
+      echo "Backing up Nextcloud installation..."
+      rsync -a \
+        --exclude='/html/' \
+        --exclude='nextcloud.log' \
+        --exclude='nextcloud.log.*' \
+        --exclude='data/appdata_*/preview/' \
+        "$DATA_DIR/" "$BACKUP_PATH/"
+
+      # Keep only the latest backup. The version history lives in daily-backup's
+      # pvc-data (4 weekly snapshot-consistent copies of this same encrypted PVC),
+      # so this browsable app-level copy only needs the most recent. Keeping the
+      # whole installation (incl. logs) x7 here was the bulk of the 87G that
+      # filled the offsite Synology.
+      #
+      # Sort by NAME, not mtime: dirs are YYYYMMDD_HHMMSS so lexical order is
+      # chronological. `rsync -a` stamps the backup dir with the SOURCE dir's
+      # mtime, which made the old `ls -dt | tail` delete the freshest backup and
+      # keep a stale one — keep the lexically-last (newest) instead.
+      echo "Cleaning old backups (keep latest)..."
+      cd "$BACKUP_DIR"
+      ls -d */ 2>/dev/null | sort | head -n -1 | xargs -r rm -rf
+
+      echo "Backup completed at $(date)"
+      echo "Backup stored at: $BACKUP_PATH"
+    EOF
+
+    "restore.sh" = <<-EOF
+      #!/bin/bash
+      # Restore script - run manually when needed
+      # Usage: ./restore.sh <backup_date>
+      # Example: ./restore.sh 20250117_030000
+      #
+      # Before restoring, enable maintenance mode:
+      #   kubectl exec -n nextcloud deployment/nextcloud -- php occ maintenance:mode --on
+      # After restoring, disable it:
+      #   kubectl exec -n nextcloud deployment/nextcloud -- php occ maintenance:mode --off
+
+      set -e
+
+      if [ -z "$1" ]; then
+        echo "Usage: $0 <backup_date>"
+        echo "Available backups:"
+        ls -1 /backup/
+        exit 1
+      fi
+
+      BACKUP_PATH="/backup/$1"
+      DATA_DIR="/nextcloud-data"
+
+      if [ ! -d "$BACKUP_PATH" ]; then
+        echo "Backup not found: $BACKUP_PATH"
+        exit 1
+      fi
+
+      echo "Restoring from $BACKUP_PATH"
+
+      # Restore everything
+      echo "Restoring Nextcloud installation..."
+      rsync -a "$BACKUP_PATH/" "$DATA_DIR/"
+
+      echo "Restore completed!"
+      echo "Remember to run: kubectl exec -n nextcloud deployment/nextcloud -- php occ maintenance:mode --off"
+    EOF
+  }
+}
+
+# Watchdog: runs every 5 minutes with two jobs:
+#  1. Apache runaway recovery — if >40 workers (normal 5-15), rollout-restart
+#     to recover node CPU.
+#  2. F1 Keel self-heal — if occ reports needsDbUpgrade=true (an interrupted
+#     `occ upgrade` after a Keel image bump left the app in maintenance mode),
+#     re-run `occ upgrade` and clear maintenance mode.
+resource "kubernetes_service_account" "nextcloud_watchdog" {
+  metadata {
+    name      = "nextcloud-watchdog"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+}
+
+resource "kubernetes_role" "nextcloud_watchdog" {
+  metadata {
+    name      = "nextcloud-watchdog"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+  rule {
+    api_groups = ["apps"]
+    resources  = ["deployments"]
+    verbs      = ["get", "patch"]
+  }
+  rule {
+    api_groups = [""]
+    resources  = ["pods"]
+    verbs      = ["list", "get"]
+  }
+  rule {
+    api_groups = [""]
+    resources  = ["pods/exec"]
+    verbs      = ["create"]
+  }
+}
+
+resource "kubernetes_role_binding" "nextcloud_watchdog" {
+  metadata {
+    name      = "nextcloud-watchdog"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+  role_ref {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "Role"
+    name      = kubernetes_role.nextcloud_watchdog.metadata[0].name
+  }
+  subject {
+    kind      = "ServiceAccount"
+    name      = kubernetes_service_account.nextcloud_watchdog.metadata[0].name
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+}
+
+resource "kubernetes_cron_job_v1" "nextcloud_watchdog" {
+  metadata {
+    name      = "nextcloud-watchdog"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+
+  spec {
+    schedule                      = "*/5 * * * *"
+    successful_jobs_history_limit = 1
+    failed_jobs_history_limit     = 3
+    concurrency_policy            = "Forbid"
+
+    job_template {
+      metadata {}
+      spec {
+        # 600s (was 120s) so the F1 self-heal `occ upgrade` isn't killed
+        # mid-migration. concurrency_policy=Forbid prevents overlap.
+        active_deadline_seconds = 600
+        template {
+          metadata {}
+          spec {
+            service_account_name = kubernetes_service_account.nextcloud_watchdog.metadata[0].name
+            restart_policy       = "Never"
+
+            container {
+              name  = "watchdog"
+              image = "bitnami/kubectl:latest"
+
+              command = ["/bin/bash", "-c", <<-EOF
+                set -e
+                # Find the nextcloud pod
+                POD=$(kubectl get pods -n nextcloud -l app.kubernetes.io/name=nextcloud -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
+                if [ -z "$POD" ]; then
+                  echo "No nextcloud pod found, skipping"
+                  exit 0
+                fi
+
+                # Count Apache worker processes (exclude grep itself and the parent apache2 process)
+                WORKERS=$(kubectl exec -n nextcloud "$POD" -c nextcloud -- pgrep -c apache2 2>/dev/null || echo "0")
+                echo "$(date): Apache worker count: $WORKERS"
+
+                # Normal operation: 5-15 workers. Runaway threshold: 40+
+                if [ "$WORKERS" -gt 40 ]; then
+                  echo "RUNAWAY DETECTED: $WORKERS Apache workers (threshold: 40)"
+                  echo "Restarting nextcloud deployment..."
+                  kubectl rollout restart deployment nextcloud -n nextcloud
+                  echo "Restart triggered at $(date)"
+                else
+                  echo "Apache workers within normal range ($WORKERS <= 40)"
+                fi
+
+                # F1 self-heal: a Keel image bump runs `occ upgrade` in the
+                # entrypoint, but if that's interrupted (e.g. a probe restart
+                # mid-upgrade) occ reports needsDbUpgrade=true and the app sits
+                # in maintenance mode (503). Re-run the upgrade and clear
+                # maintenance mode. Gated on needsDbUpgrade only, so a
+                # deliberate manual maintenance window is left untouched.
+                ST=$(kubectl exec -n nextcloud "$POD" -c nextcloud -- php occ status --output=json 2>/dev/null || true)
+                if echo "$ST" | grep -q '"needsDbUpgrade":true'; then
+                  echo "$(date): needsDbUpgrade=true → running occ upgrade"
+                  kubectl exec -n nextcloud "$POD" -c nextcloud -- php occ upgrade --no-interaction || true
+                  kubectl exec -n nextcloud "$POD" -c nextcloud -- php occ maintenance:mode --off || true
+                  echo "$(date): self-heal occ upgrade complete"
+                else
+                  echo "$(date): occ status healthy (no DB upgrade pending)"
+                fi
+              EOF
+              ]
+            }
+          }
+        }
+      }
+    }
+  }
+  lifecycle {
+    # KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
+    ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
+  }
+}
+
+resource "kubernetes_cron_job_v1" "nextcloud-backup" {
+  metadata {
+    name      = "nextcloud-backup"
+    namespace = kubernetes_namespace.nextcloud.metadata[0].name
+  }
+
+  spec {
+    schedule                      = "0 3 * * 0" # Sunday at 3 AM
+    successful_jobs_history_limit = 3
+    failed_jobs_history_limit     = 3
+    concurrency_policy            = "Forbid"
+
+    job_template {
+      metadata {}
+      spec {
+        template {
+          metadata {}
+          spec {
+            restart_policy = "OnFailure"
+
+            # Backup mounts the same RWO PVC (proxmox-lvm-encrypted) as the
+            # main nextcloud pod, so it MUST schedule on the same node — the
+            # volume cannot attach to two nodes simultaneously. Without this
+            # the backup pod is stuck in ContainerCreating until cron retries.
+            affinity {
+              pod_affinity {
+                required_during_scheduling_ignored_during_execution {
+                  label_selector {
+                    match_labels = {
+                      "app.kubernetes.io/name"     = "nextcloud"
+                      "app.kubernetes.io/instance" = "nextcloud"
+                    }
+                  }
+                  topology_key = "kubernetes.io/hostname"
+                  namespaces   = [kubernetes_namespace.nextcloud.metadata[0].name]
+                }
+              }
+            }
+
+            container {
+              name  = "backup"
+              image = "alpine:latest"
+
+              command = ["/bin/sh", "-c", "apk add --no-cache rsync bash && /scripts/backup.sh"]
+
+              volume_mount {
+                name       = "nextcloud-data"
+                mount_path = "/nextcloud-data"
+              }
+
+              volume_mount {
+                name       = "backup"
+                mount_path = "/backup"
+              }
+
+              volume_mount {
+                name       = "scripts"
+                mount_path = "/scripts"
+              }
+            }
+
+            volume {
+              name = "nextcloud-data"
+              persistent_volume_claim {
+                claim_name = kubernetes_persistent_volume_claim.nextcloud_data_encrypted.metadata[0].name
+              }
+            }
+
+            volume {
+              name = "backup"
+              persistent_volume_claim {
+                claim_name = module.nfs_nextcloud_backup_host.claim_name
+              }
+            }
+
+            volume {
+              name = "scripts"
+              config_map {
+                name         = kubernetes_config_map.backup-script.metadata[0].name
+                default_mode = "0755"
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  lifecycle {
+    # KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
+    ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
+  }
+}
+
+# CI retrigger 2026-05-16T13:42:57+00:00 — bulk enrollment apply (pipeline #689 killed)
+# CI retrigger v2 2026-05-16T13:46:35+00:00
--- a/stacks/nextcloud/providers.tf
+++ b/stacks/nextcloud/providers.tf
@ -0,0 +1,53 @@
+# Generated by Terragrunt. Sig: nIlQXj57tbuaRZEa
+terraform {
+  required_providers {
+    vault = {
+      source  = "hashicorp/vault"
+      version = "~> 4.0"
+    }
+    cloudflare = {
+      source  = "cloudflare/cloudflare"
+      version = "~> 4"
+    }
+    authentik = {
+      source  = "goauthentik/authentik"
+      version = "~> 2024.10"
+    }
+    # kubectl (gavinbunney) — workaround for hashicorp/kubernetes
+    # `kubernetes_manifest` panics on Kyverno CRDs. See beads code-e2dp.
+    # Declared for all stacks but only used where opted-in.
+    kubectl = {
+      source  = "gavinbunney/kubectl"
+      version = "~> 1.14"
+    }
+    proxmox = {
+      source  = "telmate/proxmox"
+      version = "3.0.2-rc07"
+    }
+  }
+}
+
+variable "kube_config_path" {
+  type    = string
+  default = "~/.kube/config"
+}
+
+provider "kubernetes" {
+  config_path = var.kube_config_path
+}
+
+provider "helm" {
+  kubernetes = {
+    config_path = var.kube_config_path
+  }
+}
+
+provider "vault" {
+  address          = "https://vault.viktorbarzin.me"
+  skip_child_token = true
+}
+
+provider "kubectl" {
+  config_path      = var.kube_config_path
+  load_config_file = true
+}
--- a/stacks/nextcloud/secrets
+++ b/stacks/nextcloud/secrets
@ -0,0 +1 @@
+../../secrets
--- a/stacks/nextcloud/terragrunt.hcl
+++ b/stacks/nextcloud/terragrunt.hcl
@ -0,0 +1,13 @@
+include "root" {
+  path = find_in_parent_folders()
+}
+
+dependency "platform" {
+  config_path  = "../platform"
+  skip_outputs = true
+}
+
+dependency "vault" {
+  config_path  = "../vault"
+  skip_outputs = true
+}