[ci skip] fix cluster health: GPU tolerations, actualbudget nfs_server, AuthentikDown alert
- Add missing nvidia.com/gpu toleration to ollama and yt-highlights deployments - Add node_selector gpu=true to ollama deployment - Pass nfs_server variable through to actualbudget factory modules - Fix AuthentikDown alert to match actual deployment name (goauthentik-server)
This commit is contained in:
parent
4fab38da1f
commit
c35bef2fd8
4 changed files with 18 additions and 1 deletions
|
|
@ -1,5 +1,6 @@
|
|||
variable "tls_secret_name" { type = string }
|
||||
variable "actualbudget_credentials" { type = map(any) }
|
||||
variable "nfs_server" { type = string }
|
||||
|
||||
|
||||
# To create a new deployment:
|
||||
|
|
@ -32,6 +33,7 @@ module "viktor" {
|
|||
name = "viktor"
|
||||
tag = "edge"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
nfs_server = var.nfs_server
|
||||
depends_on = [kubernetes_namespace.actualbudget]
|
||||
tier = local.tiers.edge
|
||||
budget_encryption_password = lookup(var.actualbudget_credentials["viktor"], "password", null)
|
||||
|
|
@ -44,6 +46,7 @@ module "anca" {
|
|||
name = "anca"
|
||||
tag = "edge"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
nfs_server = var.nfs_server
|
||||
depends_on = [kubernetes_namespace.actualbudget]
|
||||
tier = local.tiers.edge
|
||||
budget_encryption_password = lookup(var.actualbudget_credentials["anca"], "password", null)
|
||||
|
|
@ -56,6 +59,7 @@ module "emo" {
|
|||
name = "emo"
|
||||
tag = "edge"
|
||||
tls_secret_name = var.tls_secret_name
|
||||
nfs_server = var.nfs_server
|
||||
depends_on = [kubernetes_namespace.actualbudget]
|
||||
tier = local.tiers.edge
|
||||
budget_encryption_password = lookup(var.actualbudget_credentials["emo"], "password", null)
|
||||
|
|
|
|||
|
|
@ -89,6 +89,14 @@ resource "kubernetes_deployment" "ollama" {
|
|||
}
|
||||
}
|
||||
spec {
|
||||
node_selector = {
|
||||
"gpu" = "true"
|
||||
}
|
||||
toleration {
|
||||
key = "nvidia.com/gpu"
|
||||
value = "true"
|
||||
effect = "NoSchedule"
|
||||
}
|
||||
container {
|
||||
image = "ollama/ollama:latest"
|
||||
name = "ollama"
|
||||
|
|
|
|||
|
|
@ -427,7 +427,7 @@ serverFiles:
|
|||
annotations:
|
||||
summary: "Headscale VPN has no available replicas"
|
||||
- alert: AuthentikDown
|
||||
expr: (kube_deployment_status_replicas_available{namespace="authentik", deployment="authentik-server"} or on() vector(0)) < 1
|
||||
expr: (kube_deployment_status_replicas_available{namespace="authentik", deployment="goauthentik-server"} or on() vector(0)) < 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
|
|
|
|||
|
|
@ -189,6 +189,11 @@ resource "kubernetes_deployment" "yt_highlights" {
|
|||
node_selector = {
|
||||
"gpu" : "true"
|
||||
}
|
||||
toleration {
|
||||
key = "nvidia.com/gpu"
|
||||
value = "true"
|
||||
effect = "NoSchedule"
|
||||
}
|
||||
container {
|
||||
name = "yt-highlights"
|
||||
image = "viktorbarzin/yt-highlights:v20-20260127"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue