infra/stacks/monitoring/modules/monitoring/grafana.tf
Viktor Barzin 278ef5f19b monitoring(grafana): swap python3 for jq in folder-ACL local-exec
CI image (ci/Dockerfile) is alpine + jq, no python3. The
grafana_admin_only_folder_acl null_resource was parsing /api/folders
with a python3 oneliner, which crashed every CI apply with
"python3: command not found" and made every monitoring stack apply
fail in CI (worked locally because the dev VM has python3).

jq is already in the CI image and produces the same output.
2026-05-22 14:16:41 +00:00

237 lines
7.3 KiB
HCL

# resource "kubernetes_persistent_volume" "prometheus_grafana_pv" {
# metadata {
# name = "grafana-pv"
# }
# spec {
# capacity = {
# "storage" = "2Gi"
# }
# access_modes = ["ReadWriteOnce"]
# persistent_volume_source {
# nfs {
# path = "/mnt/main/grafana"
# server = var.nfs_server
# }
# # iscsi {
# # target_portal = "iscsi.viktorbarzin.lan:3260"
# # iqn = "iqn.2020-12.lan.viktorbarzin:storage:monitoring:grafana"
# # lun = 0
# # fs_type = "ext4"
# # }
# }
# }
# }
resource "kubernetes_persistent_volume" "alertmanager_pv" {
metadata {
name = "alertmanager-pv"
}
spec {
capacity = {
"storage" = "2Gi"
}
access_modes = ["ReadWriteOnce"]
persistent_volume_source {
csi {
driver = "nfs.csi.k8s.io"
volume_handle = "alertmanager-pv"
volume_attributes = {
server = "192.168.1.127"
share = "/srv/nfs/alertmanager"
}
}
}
mount_options = [
"soft",
"timeo=30",
"retrans=3",
"actimeo=5",
]
storage_class_name = "nfs-truenas"
persistent_volume_reclaim_policy = "Retain"
}
}
# resource "kubernetes_persistent_volume_claim" "grafana_pvc" {
# metadata {
# name = "grafana-pvc"
# namespace = kubernetes_namespace.monitoring.metadata[0].name
# }
# spec {
# access_modes = ["ReadWriteOnce"]
# resources {
# requests = {
# "storage" = "2Gi"
# }
# }
# }
# }
# DB credentials from Vault database engine (rotated automatically)
# Provides GF_DATABASE_PASSWORD that auto-updates when password rotates
resource "kubernetes_manifest" "grafana_db_creds" {
manifest = {
apiVersion = "external-secrets.io/v1beta1"
kind = "ExternalSecret"
metadata = {
name = "grafana-db-creds"
namespace = kubernetes_namespace.monitoring.metadata[0].name
}
spec = {
refreshInterval = "15m"
secretStoreRef = {
name = "vault-database"
kind = "ClusterSecretStore"
}
target = {
name = "grafana-db-creds"
template = {
data = {
GF_DATABASE_PASSWORD = "{{ .password }}"
}
}
}
data = [{
secretKey = "password"
remoteRef = {
key = "static-creds/mysql-grafana"
property = "password"
}
}]
}
}
}
locals {
# Dashboard folder assignments
dashboard_folders = {
# Cluster & Kubernetes
"api_server.json" = "Cluster"
"cluster_health.json" = "Cluster"
"nodes.json" = "Cluster"
"pods.json" = "Cluster"
"kube-state-metrics.json" = "Cluster"
# Networking & DNS
"core_dns.json" = "Networking"
"technitium-dns.json" = "Networking"
"nginx_ingress.json" = "Networking"
"network_traffic.json" = "Networking"
# Hardware & Host
"node_exporter_full.json" = "Hardware"
"proxmox_node_exporter.json" = "Hardware"
"idrac.json" = "Hardware"
"ups.json" = "Hardware"
"nvidia.json" = "Hardware"
# Operations
"backup_health.json" = "Operations"
"registry.json" = "Operations"
"loki.json" = "Operations"
"k8s-audit.json" = "Operations"
# Applications
"qbittorrent.json" = "Applications"
"realestate-crawler.json" = "Applications"
"openclaw.json" = "Applications"
"uk-payslip.json" = "Finance (Personal)"
"wealth.json" = "Finance (Personal)"
"job-hunter.json" = "Finance"
"fire-planner.json" = "Finance"
}
# Folders restricted to the Grafana admin user (anonymous Viewer + any future
# non-admin users are denied). Permission set by null_resource below via the
# Grafana folder permissions API after the dashboard sidecar auto-creates the
# folder. Server-admin always retains access regardless of folder ACL.
admin_only_folders = [
"Finance (Personal)",
]
}
resource "kubernetes_config_map" "grafana_dashboards" {
for_each = fileset("${path.module}/dashboards", "*.json")
metadata {
name = "grafana-dashboard-${replace(trimsuffix(each.value, ".json"), "_", "-")}"
namespace = kubernetes_namespace.monitoring.metadata[0].name
labels = {
grafana_dashboard = "1"
}
annotations = {
grafana_folder = lookup(local.dashboard_folders, each.value, "General")
}
}
data = {
(each.value) = file("${path.module}/dashboards/${each.value}")
}
}
# Lock down "admin only" folders via Grafana folder permissions API.
# Default org-role inheritance gives Viewer + Editor read access to every
# folder; explicitly setting the folder ACL to {Admin: 4} overrides that
# inheritance so Viewer/Editor (incl. anonymous-Viewer) get no access.
# The Grafana super-admin (`admin` user) always retains access regardless.
resource "null_resource" "grafana_admin_only_folder_acl" {
for_each = toset(local.admin_only_folders)
# Re-runs on tg apply (cheap, idempotent API call). Catches drift if anyone
# edits permissions via the UI or the folder is rebuilt.
triggers = {
folder = each.value
always = timestamp()
}
provisioner "local-exec" {
interpreter = ["/bin/bash", "-c"]
command = <<-EOT
set -euo pipefail
FOLDER='${each.value}'
KUBECONFIG_FLAG='--kubeconfig ${var.kube_config_path}'
POD=$(kubectl $KUBECONFIG_FLAG get pod -n monitoring -l app.kubernetes.io/name=grafana -o jsonpath='{.items[0].metadata.name}')
ADMIN_PW=$(kubectl $KUBECONFIG_FLAG get secret -n monitoring grafana -o jsonpath='{.data.admin-password}' | base64 -d)
# Wait up to 60s for the dashboard sidecar to materialise the folder.
for i in $(seq 1 12); do
FOLDER_UID=$(kubectl $KUBECONFIG_FLAG exec -n monitoring "$POD" -c grafana -- \
curl -sf -u "admin:$ADMIN_PW" "http://localhost:3000/api/folders" \
| jq -r --arg t "$FOLDER" 'first(.[] | select(.title == $t) | .uid) // ""' || true)
if [ -n "$FOLDER_UID" ]; then break; fi
sleep 5
done
if [ -z "$FOLDER_UID" ]; then
echo "ERROR: folder '$FOLDER' not found in Grafana after 60s"
exit 1
fi
# Admin-only ACL. permission codes: 1=View, 2=Edit, 4=Admin.
kubectl $KUBECONFIG_FLAG exec -n monitoring "$POD" -c grafana -- \
curl -sf -u "admin:$ADMIN_PW" -X POST \
-H "Content-Type: application/json" \
-d '{"items":[{"role":"Admin","permission":4}]}' \
"http://localhost:3000/api/folders/$FOLDER_UID/permissions" >/dev/null
echo "set admin-only ACL on folder '$FOLDER' (uid=$FOLDER_UID)"
EOT
}
depends_on = [
helm_release.grafana,
kubernetes_config_map.grafana_dashboards,
]
}
resource "helm_release" "grafana" {
namespace = kubernetes_namespace.monitoring.metadata[0].name
create_namespace = true
name = "grafana"
atomic = true
timeout = 600
repository = "https://grafana.github.io/helm-charts"
chart = "grafana"
values = [templatefile("${path.module}/grafana_chart_values.yaml", { grafana_admin_password = var.grafana_admin_password, mysql_host = var.mysql_host })]
depends_on = [kubernetes_manifest.grafana_db_creds]
}