downgrade nvidia driver to work with 12.8 cuda[ci skip]

This commit is contained in:
Viktor Barzin 2025-12-14 19:09:07 +00:00
parent 397fa0cba7
commit 308ce0019d
2 changed files with 13 additions and 46 deletions

View file

@ -49,7 +49,7 @@ resource "helm_release" "nvidia-gpu-operator" {
# version = "0.9.3"
timeout = 6000
# values = [templatefile("${path.module}/values.yaml", {})]
values = [templatefile("${path.module}/values.yaml", {})]
}
resource "kubernetes_deployment" "nvidia-exporter" {

View file

@ -1,48 +1,15 @@
# values-cuda12.8.yaml
driver:
enabled: true
repository: nvcr.io/nvidia/driver
# repository: nvcr.io/nvidia/driver
# choose a driver version compatible with your GPU + CUDA 12.x (example)
version: "535.113.01"
toolkit:
enabled: true
image:
repository: nvcr.io/nvidia/cuda
# use a CUDA 12.8 toolkit image; adjust OS tag as needed (e.g. ubuntu22.04, ubuntu20.04)
tag: "12.8.1-devel-ubuntu22.04"
# (Optional) If you have containerd, you might need environment for containerd config
# env:
# - name: CONTAINERD_CONFIG
# value: /etc/containerd/config.toml
# - name: CONTAINERD_SOCKET
# value: /run/containerd/containerd.sock
# - name: RUNTIME_CONFIG_SOURCE
# value: "command, file"
devicePlugin:
enabled: true
image:
repository: nvcr.io/nvidia/k8s-device-plugin
# pick a device-plugin build compatible with CUDA 12.8
tag: "1.14.0-12.8-ubuntu22.04"
dcgmExporter:
enabled: true
image:
repository: nvcr.io/nvidia/k8s/dcgm-exporter
tag: "2.12.13-12.8-ubuntu22.04"
# Optional: if you use MIG or want MIG Manager
# migManager:
# enabled: false
# If you want GPUFeatureDiscovery (GFD) — optional
# gfd:
# enabled: false
# Other optional overrides (keep defaults or adjust as needed)
# ccManager:
# enabled: false
# cdi:
# enabled: true
# NVIDIA GPU driver - https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/platform-support.html#known-issue
# https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/
# 13.x >= 580
# 12.x >= 525, <580
# 11.x >= 450, <525
#
# Delete the cluster policy before each change
# version: "575.57.08" # CUDA 12.9
version: "570.195.03" # CUDA 12.8
upgradePolicy:
autoUpgrade: false