Add nvidia.com/gpu toleration to all GPU workloads (frigate, ollama) to support NoSchedule taint on GPU nodes. Update nvidia operator helm values with daemonset tolerations. Enhance GPU pod memory exporter with Kubernetes API integration to resolve container IDs to pod names/namespaces, adding RBAC resources for API access.
28 lines
463 B
YAML
28 lines
463 B
YAML
ollama:
|
|
gpu:
|
|
# -- Enable GPU integration
|
|
enabled: true
|
|
|
|
# -- GPU type: 'nvidia' or 'amd'
|
|
type: "nvidia"
|
|
|
|
# -- Specify the number of GPU to 1
|
|
number: 1
|
|
|
|
# -- List of models to pull at container startup
|
|
models:
|
|
pull:
|
|
- llama3
|
|
|
|
persistentVolume:
|
|
enabled: true
|
|
existingClaim: "ollama-pvc"
|
|
|
|
nodeSelector:
|
|
gpu: "true"
|
|
|
|
tolerations:
|
|
- key: "nvidia.com/gpu"
|
|
operator: "Equal"
|
|
value: "true"
|
|
effect: "NoSchedule"
|