monitoring: route proxmox-exporter to scrape_slow job (fix flapping alerts)

PVE API endpoint regularly takes ~11s with ~1035 thin LVs on the host (1002 k8s-csi PVCs + 22 VMs + 11 system), blowing past Prometheus's default 10s scrape_timeout and flapping ProxmoxMetricsMissing + ScrapeTargetDown. Switch the Service annotation from prometheus.io/scrape to prometheus.io/scrape_slow so the scrape moves to the existing kubernetes-service-endpoints-slow job (5m interval, 30s timeout).
2026-05-27 18:36:11 +00:00 · 2026-05-27 18:36:11 +00:00 · d72c7169c0
commit d72c7169c0
parent f121bee121
1 changed files with 7 additions and 1 deletions
--- a/stacks/monitoring/modules/monitoring/pve_exporter.tf
+++ b/stacks/monitoring/modules/monitoring/pve_exporter.tf
@ -100,7 +100,13 @@ resource "kubernetes_service" "proxmox-exporter" {
      "app" = "proxmox-exporter"
    }
    annotations = {
-      "prometheus.io/scrape"        = "true"
+      # Use scrape_slow (5m interval, 30s timeout in prometheus values) because
+      # the PVE API endpoint regularly takes ~11s with ~1000 k8s-csi LVs on the
+      # host, blowing past the default 10s scrape_timeout and flapping the
+      # ProxmoxMetricsMissing + ScrapeTargetDown alerts. The slow job is gated
+      # by the `prometheus_io_scrape_slow=true` annotation in
+      # prometheus_chart_values.tpl and also excludes us from the fast job.
+      "prometheus.io/scrape_slow"   = "true"
      "prometheus.io/port"          = 9221
      "prometheus.io/path"          = "/pve"
      "prometheus.io/param_target"  = "192.168.1.127"