diff --git a/stacks/broker-sync/main.tf b/stacks/broker-sync/main.tf index 76d822d8..67f8089e 100644 --- a/stacks/broker-sync/main.tf +++ b/stacks/broker-sync/main.tf @@ -167,6 +167,12 @@ resource "kubernetes_cron_job_v1" "trading212" { } spec { restart_policy = "OnFailure" + # Pin every job that mounts the shared RWO data volume to one node: + # cross-node scheduling forced a nightly detach/attach cycle whose + # QMP hotplug intermittently ghost-attaches on disk-heavy VMs and + # wedges all broker-sync jobs in ContainerCreating (2026-07-01/02). + # One node = volume attaches once and stays put. + node_selector = { "kubernetes.io/hostname" = "k8s-node4" } # See imap cron — without fsGroup=10001 the broker user (uid=10001 # gid=999) can't write the sqlite3 journal next to /data/sync.db # and the dedup.record() call after a successful WF import crashes @@ -277,6 +283,12 @@ resource "kubernetes_cron_job_v1" "ibkr" { } spec { restart_policy = "OnFailure" + # Pin every job that mounts the shared RWO data volume to one node: + # cross-node scheduling forced a nightly detach/attach cycle whose + # QMP hotplug intermittently ghost-attaches on disk-heavy VMs and + # wedges all broker-sync jobs in ContainerCreating (2026-07-01/02). + # One node = volume attaches once and stays put. + node_selector = { "kubernetes.io/hostname" = "k8s-node4" } security_context { fs_group = 10001 } @@ -411,6 +423,12 @@ resource "kubernetes_cron_job_v1" "imap" { } spec { restart_policy = "OnFailure" + # Pin every job that mounts the shared RWO data volume to one node: + # cross-node scheduling forced a nightly detach/attach cycle whose + # QMP hotplug intermittently ghost-attaches on disk-heavy VMs and + # wedges all broker-sync jobs in ContainerCreating (2026-07-01/02). + # One node = volume attaches once and stays put. + node_selector = { "kubernetes.io/hostname" = "k8s-node4" } # The broker image's user is uid=10001 gid=999, but the shared # data PVC's /data root was created with gid=10001 (legacy from # an earlier image build). Without fsGroup the pod can't write @@ -563,6 +581,12 @@ resource "kubernetes_cron_job_v1" "csv_drop" { } spec { restart_policy = "OnFailure" + # Pin every job that mounts the shared RWO data volume to one node: + # cross-node scheduling forced a nightly detach/attach cycle whose + # QMP hotplug intermittently ghost-attaches on disk-heavy VMs and + # wedges all broker-sync jobs in ContainerCreating (2026-07-01/02). + # One node = volume attaches once and stays put. + node_selector = { "kubernetes.io/hostname" = "k8s-node4" } container { name = "broker-sync" image = local.broker_sync_image @@ -655,6 +679,12 @@ resource "kubernetes_cron_job_v1" "fx_reconcile" { } spec { restart_policy = "OnFailure" + # Pin every job that mounts the shared RWO data volume to one node: + # cross-node scheduling forced a nightly detach/attach cycle whose + # QMP hotplug intermittently ghost-attaches on disk-heavy VMs and + # wedges all broker-sync jobs in ContainerCreating (2026-07-01/02). + # One node = volume attaches once and stays put. + node_selector = { "kubernetes.io/hostname" = "k8s-node4" } container { name = "broker-sync" image = local.broker_sync_image @@ -747,6 +777,12 @@ resource "kubernetes_cron_job_v1" "backup" { } spec { restart_policy = "OnFailure" + # Pin every job that mounts the shared RWO data volume to one node: + # cross-node scheduling forced a nightly detach/attach cycle whose + # QMP hotplug intermittently ghost-attaches on disk-heavy VMs and + # wedges all broker-sync jobs in ContainerCreating (2026-07-01/02). + # One node = volume attaches once and stays put. + node_selector = { "kubernetes.io/hostname" = "k8s-node4" } container { name = "backup" image = "alpine:3.20" @@ -850,6 +886,12 @@ resource "kubernetes_cron_job_v1" "fidelity" { } spec { restart_policy = "OnFailure" + # Pin every job that mounts the shared RWO data volume to one node: + # cross-node scheduling forced a nightly detach/attach cycle whose + # QMP hotplug intermittently ghost-attaches on disk-heavy VMs and + # wedges all broker-sync jobs in ContainerCreating (2026-07-01/02). + # One node = volume attaches once and stays put. + node_selector = { "kubernetes.io/hostname" = "k8s-node4" } # Materialise the JSON storage_state from the projected Secret # onto the PVC where Playwright expects to read it. Init container # runs as root; the main broker-sync container runs as uid 10001,