diff --git a/modules/kubernetes/monitoring/dashboards/nvidia.json b/modules/kubernetes/monitoring/dashboards/nvidia.json index ee9a21ac..ed5bd228 100644 --- a/modules/kubernetes/monitoring/dashboards/nvidia.json +++ b/modules/kubernetes/monitoring/dashboards/nvidia.json @@ -109,7 +109,7 @@ "sort": "none" } }, - "pluginVersion": "12.3.0", + "pluginVersion": "12.3.1", "targets": [ { "datasource": { @@ -183,7 +183,7 @@ "showThresholdMarkers": true, "sizing": "auto" }, - "pluginVersion": "12.3.0", + "pluginVersion": "12.3.1", "targets": [ { "datasource": { @@ -287,7 +287,7 @@ "sort": "none" } }, - "pluginVersion": "12.3.0", + "pluginVersion": "12.3.1", "targets": [ { "datasource": { @@ -361,7 +361,7 @@ "showThresholdMarkers": true, "sizing": "auto" }, - "pluginVersion": "12.3.0", + "pluginVersion": "12.3.1", "targets": [ { "datasource": { @@ -469,7 +469,7 @@ "sort": "none" } }, - "pluginVersion": "12.3.0", + "pluginVersion": "12.3.1", "targets": [ { "datasource": { @@ -572,7 +572,7 @@ "sort": "none" } }, - "pluginVersion": "12.3.0", + "pluginVersion": "12.3.1", "targets": [ { "datasource": { @@ -676,7 +676,7 @@ "sort": "none" } }, - "pluginVersion": "12.3.0", + "pluginVersion": "12.3.1", "targets": [ { "datasource": { @@ -695,6 +695,106 @@ ], "title": "GPU SM Clocks", "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 19, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by (namespace) (gpu_pod_memory_used_bytes)", + "instant": false, + "legendFormat": "{{namespace}}", + "range": true, + "refId": "A" + } + ], + "title": "GPU Memory per Application", + "type": "timeseries" } ], "preload": false, @@ -705,12 +805,12 @@ "list": [] }, "time": { - "from": "now-3h", + "from": "now-12h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "NVIDIA DCGM Exporter Dashboard", "uid": "Oxed_c6Wz", - "version": 8 + "version": 9 } diff --git a/modules/kubernetes/ollama/main.tf b/modules/kubernetes/ollama/main.tf index c08b6700..95543857 100644 --- a/modules/kubernetes/ollama/main.tf +++ b/modules/kubernetes/ollama/main.tf @@ -1,5 +1,9 @@ variable "tls_secret_name" {} variable "tier" { type = string } +variable "ollama_api_credentials" { + type = map(string) + default = {} +} resource "kubernetes_namespace" "ollama" { metadata { @@ -158,17 +162,57 @@ module "ollama-ingress" { port = 11434 } -# Ollama API ingress for Claude Code access (restricted to LAN/VPN) +# Ollama API ingress for external access (basicAuth protected) +locals { + ollama_api_htpasswd = join("\n", [for name, pass in var.ollama_api_credentials : "${name}:${bcrypt(pass, 10)}"]) +} + +resource "kubernetes_secret" "ollama_api_basic_auth" { + count = length(var.ollama_api_credentials) > 0 ? 1 : 0 + metadata { + name = "ollama-api-basic-auth-secret" + namespace = kubernetes_namespace.ollama.metadata[0].name + } + + data = { + auth = local.ollama_api_htpasswd + } + + type = "Opaque" + lifecycle { + ignore_changes = [data] + } +} + +resource "kubernetes_manifest" "ollama_api_basic_auth_middleware" { + count = length(var.ollama_api_credentials) > 0 ? 1 : 0 + manifest = { + apiVersion = "traefik.io/v1alpha1" + kind = "Middleware" + metadata = { + name = "ollama-api-basic-auth" + namespace = kubernetes_namespace.ollama.metadata[0].name + } + spec = { + basicAuth = { + secret = kubernetes_secret.ollama_api_basic_auth[0].metadata[0].name + } + } + } +} + module "ollama-api-ingress" { - source = "../ingress_factory" - namespace = kubernetes_namespace.ollama.metadata[0].name - name = "ollama-api" - service_name = "ollama" - root_domain = "viktorbarzin.lan" - tls_secret_name = var.tls_secret_name - allow_local_access_only = true # Restricts to 10.0.0.0/8, 192.168.1.0/24 - ssl_redirect = false - port = 11434 + source = "../ingress_factory" + namespace = kubernetes_namespace.ollama.metadata[0].name + name = "ollama-api" + service_name = "ollama" + root_domain = "viktorbarzin.me" + tls_secret_name = var.tls_secret_name + ssl_redirect = true + port = 11434 + extra_annotations = { + "traefik.ingress.kubernetes.io/router.middlewares" = "ollama-ollama-api-basic-auth@kubernetescrd,traefik-rate-limit@kubernetescrd,traefik-crowdsec@kubernetescrd" + } } # Web UI diff --git a/modules/kubernetes/plotting-book/main.tf b/modules/kubernetes/plotting-book/main.tf index c67e5fd0..8b1e91e8 100644 --- a/modules/kubernetes/plotting-book/main.tf +++ b/modules/kubernetes/plotting-book/main.tf @@ -25,6 +25,11 @@ resource "kubernetes_deployment" "plotting-book" { tier = var.tier } } + lifecycle { + ignore_changes = [ + spec[0].template[0].spec[0].container[0].image, + ] + } spec { replicas = 1 selector { @@ -40,9 +45,10 @@ resource "kubernetes_deployment" "plotting-book" { } spec { container { - # image = "ancamilea/book-plotter:7" - image = "viktorbarzin/book-plotter:7" - name = "plotting-book" + image = "ancamilea/book-plotter:latest" + # image = "viktorbarzin/book-plotter:7" + name = "plotting-book" + image_pull_policy = "Always" port { container_port = 3001 } diff --git a/modules/kubernetes/reverse_proxy/factory/main.tf b/modules/kubernetes/reverse_proxy/factory/main.tf index ba81b01b..371b2d47 100644 --- a/modules/kubernetes/reverse_proxy/factory/main.tf +++ b/modules/kubernetes/reverse_proxy/factory/main.tf @@ -111,7 +111,7 @@ resource "kubernetes_ingress_v1" "proxied-ingress" { } } -# Rybbit analytics middleware (rewritebody plugin) - created per service when rybbit_site_id is set +# Rybbit analytics middleware (rewrite-body plugin with content-type filtering) - created per service when rybbit_site_id is set resource "kubernetes_manifest" "rybbit_analytics" { count = var.rybbit_site_id != null ? 1 : 0 @@ -124,11 +124,14 @@ resource "kubernetes_manifest" "rybbit_analytics" { } spec = { plugin = { - rewritebody = { + rewrite-body = { rewrites = [{ regex = "" replacement = "" }] + monitoring = { + types = ["text/html"] + } } } } diff --git a/modules/kubernetes/traefik/main.tf b/modules/kubernetes/traefik/main.tf index 7bea0f36..cb7c51c5 100644 --- a/modules/kubernetes/traefik/main.tf +++ b/modules/kubernetes/traefik/main.tf @@ -135,9 +135,9 @@ resource "helm_release" "traefik" { moduleName = "github.com/maxlerebourg/crowdsec-bouncer-traefik-plugin" version = "v1.4.2" } - rewritebody = { - moduleName = "github.com/traefik/plugin-rewritebody" - version = "v0.3.1" + rewrite-body = { + moduleName = "github.com/packruler/rewrite-body" + version = "v1.2.0" } } }