gpu: schedule off NFD label, not k8s-node1 hostname

Remove every hardcoded reference to k8s-node1 that pinned GPU
scheduling to a specific host:

- GPU workload nodeSelectors: gpu=true -> nvidia.com/gpu.present=true
  (frigate, immich, whisper, piper, ytdlp, ebook2audiobook, audiblez,
  audiblez-web, nvidia-exporter, gpu-pod-exporter). The NFD label is
  auto-applied by gpu-feature-discovery on any node carrying an
  NVIDIA PCI device, so the selector follows the card.

- null_resource.gpu_node_config: rewrite to enumerate NFD-labeled
  nodes (feature.node.kubernetes.io/pci-10de.present=true) and taint
  each with nvidia.com/gpu=true:PreferNoSchedule. Drop the manual
  'kubectl label gpu=true' since NFD handles labeling.

- MySQL anti-affinity: kubernetes.io/hostname NotIn [k8s-node1] ->
  nvidia.com/gpu.present NotIn [true]. Same intent (keep MySQL off
  the GPU node) but portable when the card relocates.

Net effect: moving the GPU card between nodes no longer requires any
Terraform edit. Verified no-op for current scheduling — both old and
new labels resolve to node1 today.

Docs updated to match: AGENTS.md, compute.md, overview.md,
proxmox-inventory.md, k8s-portal agent-guidance string.
This commit is contained in:
Viktor Barzin 2026-04-22 13:43:07 +00:00
parent 134d6b9a82
commit e2146e6916
12 changed files with 52 additions and 36 deletions

View file

@ -63,18 +63,25 @@ resource "kubernetes_resource_quota" "nvidia_quota" {
}
}
# Apply GPU taint and label to ensure only GPU workloads run on GPU node
# Apply GPU taint dynamically based on NFD-discovered GPU nodes. The
# NFD label `feature.node.kubernetes.io/pci-10de.present=true` is
# auto-applied on any node with an NVIDIA PCI device (vendor 0x10de),
# so the taint follows the card if it moves between nodes. Workload
# nodeSelectors key off `nvidia.com/gpu.present=true` (applied by
# gpu-feature-discovery once the operator is up).
resource "null_resource" "gpu_node_config" {
provisioner "local-exec" {
command = <<-EOT
kubectl taint nodes k8s-node1 nvidia.com/gpu=true:PreferNoSchedule --overwrite
kubectl label nodes k8s-node1 gpu=true --overwrite
set -euo pipefail
for node in $(kubectl get nodes -l feature.node.kubernetes.io/pci-10de.present=true -o jsonpath='{.items[*].metadata.name}'); do
kubectl taint nodes "$node" nvidia.com/gpu=true:PreferNoSchedule --overwrite
done
EOT
}
# Re-run if namespace changes (proxy for cluster changes)
triggers = {
namespace = kubernetes_namespace.nvidia.metadata[0].name
namespace = kubernetes_namespace.nvidia.metadata[0].name
command_hash = "dynamic-taint-v1"
}
}
@ -141,7 +148,7 @@ resource "kubernetes_deployment" "nvidia-exporter" {
}
spec {
node_selector = {
"gpu" : "true"
"nvidia.com/gpu.present" : "true"
}
toleration {
key = "nvidia.com/gpu"
@ -604,7 +611,7 @@ resource "kubernetes_daemonset" "gpu_pod_exporter" {
service_account_name = kubernetes_service_account.gpu_pod_exporter.metadata[0].name
node_selector = {
"gpu" : "true"
"nvidia.com/gpu.present" : "true"
}
toleration {