infra/modules/kubernetes/ollama/values.yaml

ollama:
  gpu:
    # -- Enable GPU integration
    enabled: true

    # -- GPU type: 'nvidia' or 'amd'
    type: "nvidia"

    # -- Specify the number of GPU to 1
    number: 1

  # -- List of models to pull at container startup
  models:
    pull:
      - llama3

persistentVolume:
  enabled: true
  existingClaim: "ollama-pvc"

nodeSelector:
  gpu: "true"

tolerations:
  - key: "nvidia.com/gpu"
    operator: "Equal"
    value: "true"
    effect: "NoSchedule"