infra/stacks/platform/modules/monitoring/prometheus.tf
Viktor Barzin 89a6e08245 [ci skip] Infrastructure hardening: security, monitoring, reliability, maintainability
Phase 1 - Critical Security:
- Netbox: move hardcoded DB/superuser passwords to variables
- MeshCentral: disable public registration, add Authentik auth
- Traefik: disable insecure API dashboard (api.insecure=false)
- Traefik: configure forwarded headers with Cloudflare trusted IPs

Phase 2 - Security Hardening:
- Add security headers middleware (HSTS, X-Frame-Options, nosniff, etc.)
- Add Kyverno pod security policies in audit mode (privileged, host
  namespaces, SYS_ADMIN, trusted registries)
- Tighten rate limiting (avg=10, burst=50)
- Add Authentik protection to grampsweb

Phase 3 - Monitoring & Alerting:
- Add critical service alerts (PostgreSQL, MySQL, Redis, Headscale,
  Authentik, Loki)
- Increase Loki retention from 7 to 30 days (720h)
- Add predictive PV filling alert (predict_linear)
- Re-enable Hackmd and Privatebin down alerts

Phase 4 - Reliability:
- Add resource requests/limits to Redis, DBaaS, Technitium, Headscale,
  Vaultwarden, Uptime Kuma
- Increase Alloy DaemonSet memory to 512Mi/1Gi

Phase 6 - Maintainability:
- Extract duplicated tiers locals to terragrunt.hcl generate block
  (removed from 67 stacks)
- Replace hardcoded NFS IP 10.0.10.15 with var.nfs_server (114
  instances across 63 files)
- Replace hardcoded Redis/PostgreSQL/MySQL/Ollama/mail host references
  with variables across ~35 stacks
- Migrate xray raw ingress resources to ingress_factory modules
2026-02-23 22:05:28 +00:00

59 lines
1.6 KiB
HCL

resource "kubernetes_persistent_volume_claim" "prometheus_server_pvc" {
metadata {
name = "prometheus-iscsi-pvc"
namespace = kubernetes_namespace.monitoring.metadata[0].name
}
spec {
access_modes = ["ReadWriteOnce"]
resources {
requests = {
storage = "15Gi"
}
}
# storage_class_name = "standard"
volume_name = "prometheus-iscsi-pv"
}
}
resource "kubernetes_persistent_volume" "prometheus_server_pvc" {
metadata {
name = "prometheus-iscsi-pv"
}
spec {
capacity = {
storage = "15Gi"
}
access_modes = ["ReadWriteOnce"]
persistent_volume_source {
nfs {
path = "/mnt/main/prometheus"
server = var.nfs_server
}
# iscsi {
# fs_type = "ext4"
# iqn = "iqn.2020-12.lan.viktorbarzin:storage:monitoring:prometheus"
# lun = 0
# target_portal = "iscsi.viktorbarzin.me:3260"
# }
}
persistent_volume_reclaim_policy = "Retain"
volume_mode = "Filesystem"
}
}
resource "helm_release" "prometheus" {
namespace = kubernetes_namespace.monitoring.metadata[0].name
create_namespace = true
name = "prometheus"
repository = "https://prometheus-community.github.io/helm-charts"
chart = "prometheus"
# version = "15.0.2"
version = "25.8.2"
values = [templatefile("${path.module}/prometheus_chart_values.tpl", { alertmanager_mail_pass = var.alertmanager_account_password, alertmanager_slack_api_url = var.alertmanager_slack_api_url, tuya_api_key = var.tiny_tuya_service_secret, haos_api_token = var.haos_api_token })]
}