gpu: schedule off NFD label, not k8s-node1 hostname

Remove every hardcoded reference to k8s-node1 that pinned GPU scheduling to a specific host: - GPU workload nodeSelectors: gpu=true -> nvidia.com/gpu.present=true (frigate, immich, whisper, piper, ytdlp, ebook2audiobook, audiblez, audiblez-web, nvidia-exporter, gpu-pod-exporter). The NFD label is auto-applied by gpu-feature-discovery on any node carrying an NVIDIA PCI device, so the selector follows the card. - null_resource.gpu_node_config: rewrite to enumerate NFD-labeled nodes (feature.node.kubernetes.io/pci-10de.present=true) and taint each with nvidia.com/gpu=true:PreferNoSchedule. Drop the manual 'kubectl label gpu=true' since NFD handles labeling. - MySQL anti-affinity: kubernetes.io/hostname NotIn [k8s-node1] -> nvidia.com/gpu.present NotIn [true]. Same intent (keep MySQL off the GPU node) but portable when the card relocates. Net effect: moving the GPU card between nodes no longer requires any Terraform edit. Verified no-op for current scheduling — both old and new labels resolve to node1 today. Docs updated to match: AGENTS.md, compute.md, overview.md, proxmox-inventory.md, k8s-portal agent-guidance string.
2026-04-22 13:43:07 +00:00 · 2026-04-22 13:43:07 +00:00 · e2146e6916
commit e2146e6916
parent 134d6b9a82
12 changed files with 52 additions and 36 deletions
--- a/stacks/dbaas/modules/dbaas/main.tf
+++ b/stacks/dbaas/modules/dbaas/main.tf
@ -157,9 +157,9 @@ resource "kubernetes_stateful_set_v1" "mysql_standalone" {
            required_during_scheduling_ignored_during_execution {
              node_selector_term {
                match_expressions {
-                  key      = "kubernetes.io/hostname"
+                  key      = "nvidia.com/gpu.present"
                  operator = "NotIn"
-                  values   = ["k8s-node1"]
+                  values   = ["true"]
                }
              }
            }
--- a/stacks/ebook2audiobook/main.tf
+++ b/stacks/ebook2audiobook/main.tf
@ -72,7 +72,7 @@ resource "kubernetes_deployment" "ebook2audiobook" {

      spec {
        node_selector = {
-          "gpu" : "true"
+          "nvidia.com/gpu.present" : "true"
        }
        toleration {
          key      = "nvidia.com/gpu"
@ -290,7 +290,7 @@ resource "kubernetes_deployment" "audiblez" {
      }
      spec {
        node_selector = {
-          "gpu" : "true"
+          "nvidia.com/gpu.present" : "true"
        }
        toleration {
          key      = "nvidia.com/gpu"
@ -356,7 +356,7 @@ resource "kubernetes_deployment" "audiblez-web" {
      }
      spec {
        node_selector = {
-          "gpu" : "true"
+          "nvidia.com/gpu.present" : "true"
        }
        toleration {
          key      = "nvidia.com/gpu"
--- a/stacks/frigate/main.tf
+++ b/stacks/frigate/main.tf
@ -87,7 +87,7 @@ resource "kubernetes_deployment" "frigate" {
      }
      spec {
        node_selector = {
-          "gpu" : true
+          "nvidia.com/gpu.present" : "true"
        }
        toleration {
          key      = "nvidia.com/gpu"
--- a/stacks/immich/main.tf
+++ b/stacks/immich/main.tf
@ -559,7 +559,7 @@ resource "kubernetes_deployment" "immich-machine-learning" {
      spec {
        priority_class_name = "gpu-workload"
        node_selector = {
-          "gpu" : "true"
+          "nvidia.com/gpu.present" : "true"
        }
        toleration {
          key      = "nvidia.com/gpu"
--- a/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+server.ts
+++ b/stacks/k8s-portal/modules/k8s-portal/files/src/routes/agent/+server.ts
@ -138,7 +138,7 @@ Kyverno auto-generates LimitRange + ResourceQuota per namespace based on tier la

 - **Proxmox**: 192.168.1.127 (Dell R730, 22c/44t, 142GB RAM)
 - **Nodes**: k8s-master (10.0.20.100), node1 (GPU, Tesla T4), node2-4
- **GPU workloads**: \`node_selector = { "gpu": "true" }\` + toleration \`nvidia.com/gpu\`
+- **GPU workloads**: \`node_selector = { "nvidia.com/gpu.present" : "true" }\` + toleration \`nvidia.com/gpu\` (label auto-applied by gpu-feature-discovery, no hostname pins)
 - **Pull-through cache**: 10.0.20.10 — use versioned image tags (cache serves stale :latest manifests)
 - **MySQL InnoDB Cluster**: 3 instances on iSCSI
 - **SMTP**: \`var.mail_host\` port 587 STARTTLS
--- a/stacks/nvidia/modules/nvidia/main.tf
+++ b/stacks/nvidia/modules/nvidia/main.tf
@ -63,18 +63,25 @@ resource "kubernetes_resource_quota" "nvidia_quota" {
  }
 }

-# Apply GPU taint and label to ensure only GPU workloads run on GPU node
+# Apply GPU taint dynamically based on NFD-discovered GPU nodes. The
+# NFD label `feature.node.kubernetes.io/pci-10de.present=true` is
+# auto-applied on any node with an NVIDIA PCI device (vendor 0x10de),
+# so the taint follows the card if it moves between nodes. Workload
+# nodeSelectors key off `nvidia.com/gpu.present=true` (applied by
+# gpu-feature-discovery once the operator is up).
 resource "null_resource" "gpu_node_config" {
  provisioner "local-exec" {
    command = <<-EOT
-      kubectl taint nodes k8s-node1 nvidia.com/gpu=true:PreferNoSchedule --overwrite
-      kubectl label nodes k8s-node1 gpu=true --overwrite
+      set -euo pipefail
+      for node in $(kubectl get nodes -l feature.node.kubernetes.io/pci-10de.present=true -o jsonpath='{.items[*].metadata.name}'); do
+        kubectl taint nodes "$node" nvidia.com/gpu=true:PreferNoSchedule --overwrite
+      done
    EOT
  }

-  # Re-run if namespace changes (proxy for cluster changes)
  triggers = {
-    namespace = kubernetes_namespace.nvidia.metadata[0].name
+    namespace    = kubernetes_namespace.nvidia.metadata[0].name
+    command_hash = "dynamic-taint-v1"
  }
 }

@ -141,7 +148,7 @@ resource "kubernetes_deployment" "nvidia-exporter" {
      }
      spec {
        node_selector = {
-          "gpu" : "true"
+          "nvidia.com/gpu.present" : "true"
        }
        toleration {
          key      = "nvidia.com/gpu"
@ -604,7 +611,7 @@ resource "kubernetes_daemonset" "gpu_pod_exporter" {
        service_account_name = kubernetes_service_account.gpu_pod_exporter.metadata[0].name

        node_selector = {
-          "gpu" : "true"
+          "nvidia.com/gpu.present" : "true"
        }

        toleration {
--- a/stacks/whisper/main.tf
+++ b/stacks/whisper/main.tf
@ -73,7 +73,7 @@ resource "kubernetes_deployment" "whisper" {
      }
      spec {
        node_selector = {
-          "gpu" : "true"
+          "nvidia.com/gpu.present" : "true"
        }
        toleration {
          key      = "nvidia.com/gpu"
@ -195,7 +195,7 @@ resource "kubernetes_deployment" "piper" {
      }
      spec {
        node_selector = {
-          "gpu" : "true"
+          "nvidia.com/gpu.present" : "true"
        }
        toleration {
          key      = "nvidia.com/gpu"
--- a/stacks/ytdlp/main.tf
+++ b/stacks/ytdlp/main.tf
@ -227,7 +227,7 @@ resource "kubernetes_deployment" "yt_highlights" {
      }
      spec {
        node_selector = {
-          "gpu" : "true"
+          "nvidia.com/gpu.present" : "true"
        }
        toleration {
          key    = "nvidia.com/gpu"