From 3489621a458385ed5f4a031cacf3a839b5d08377 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Sun, 26 Apr 2026 11:03:20 +0000 Subject: [PATCH] nextcloud(backup): pin backup pod to nextcloud's node via podAffinity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The weekly backup mounts the same RWO PVC (proxmox-lvm-encrypted) as the main nextcloud deployment. Single-node attach — the backup pod can never mount the volume if it lands on a different node, and was stuck in ContainerCreating for 6+ hours when cron fired today. Add pod_affinity (required, hostname topology) so the backup co-locates with the nextcloud app pod. Discovered via cluster-health probe; manual verify run scheduled on k8s-node3 next to nextcloud's pod and completed the rsync in seconds. --- stacks/nextcloud/main.tf | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/stacks/nextcloud/main.tf b/stacks/nextcloud/main.tf index 14a5122d..d737fa5c 100644 --- a/stacks/nextcloud/main.tf +++ b/stacks/nextcloud/main.tf @@ -493,6 +493,25 @@ resource "kubernetes_cron_job_v1" "nextcloud-backup" { spec { restart_policy = "OnFailure" + # Backup mounts the same RWO PVC (proxmox-lvm-encrypted) as the + # main nextcloud pod, so it MUST schedule on the same node — the + # volume cannot attach to two nodes simultaneously. Without this + # the backup pod is stuck in ContainerCreating until cron retries. + affinity { + pod_affinity { + required_during_scheduling_ignored_during_execution { + label_selector { + match_labels = { + "app.kubernetes.io/name" = "nextcloud" + "app.kubernetes.io/instance" = "nextcloud" + } + } + topology_key = "kubernetes.io/hostname" + namespaces = [kubernetes_namespace.nextcloud.metadata[0].name] + } + } + } + container { name = "backup" image = "alpine:latest"