2026-03-17 21:34:11 +00:00
|
|
|
|
|
|
|
|
resource "kubernetes_config_map" "redfish-config" {
|
|
|
|
|
metadata {
|
|
|
|
|
name = "redfish-exporter-config"
|
|
|
|
|
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
|
|
|
|
|
|
|
|
|
annotations = {
|
|
|
|
|
"reloader.stakater.com/match" = "true"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
data = {
|
|
|
|
|
"config.yml" = <<-EOF
|
|
|
|
|
address: 0.0.0.0
|
|
|
|
|
port: 9610
|
|
|
|
|
hosts:
|
|
|
|
|
${var.idrac_host}:
|
|
|
|
|
username: ${var.idrac_username}
|
|
|
|
|
password: ${var.idrac_password}
|
|
|
|
|
default:
|
|
|
|
|
username: root
|
|
|
|
|
password: calvin
|
|
|
|
|
metrics:
|
|
|
|
|
all: true
|
|
|
|
|
# system: true
|
|
|
|
|
# sensors: true
|
|
|
|
|
# power: true
|
|
|
|
|
# sel: false # Disable SEL - often slow
|
|
|
|
|
# storage: true # Disable storage - slowest endpoint
|
|
|
|
|
# memory: true
|
|
|
|
|
# network: false # Disable network adapters
|
|
|
|
|
# firmware: false # Don't need this frequently
|
|
|
|
|
EOF
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
resource "kubernetes_deployment" "idrac-redfish" {
|
|
|
|
|
metadata {
|
|
|
|
|
name = "idrac-redfish-exporter"
|
|
|
|
|
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
|
|
|
|
labels = {
|
|
|
|
|
app = "idrac-redfish-exporter"
|
|
|
|
|
tier = var.tier
|
|
|
|
|
}
|
|
|
|
|
annotations = {
|
|
|
|
|
"reloader.stakater.com/search" = "true"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
spec {
|
|
|
|
|
replicas = 1
|
|
|
|
|
selector {
|
|
|
|
|
match_labels = {
|
|
|
|
|
app = "idrac-redfish-exporter"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
template {
|
|
|
|
|
metadata {
|
|
|
|
|
labels = {
|
|
|
|
|
app = "idrac-redfish-exporter"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
spec {
|
|
|
|
|
priority_class_name = "tier-1-cluster"
|
|
|
|
|
container {
|
|
|
|
|
# https://github.com/mrlhansen/idrac_exporter?tab=readme-ov-file
|
2026-03-23 22:07:36 +02:00
|
|
|
# Patched v2.4.1 - restored missing idrac_power_supply_input_voltage metric
|
|
|
|
|
# See: https://github.com/mrlhansen/idrac_exporter/issues/176
|
|
|
|
|
image = "viktorbarzin/idrac-redfish-exporter:2.4.1-voltage-fix"
|
2026-03-17 21:34:11 +00:00
|
|
|
name = "redfish-exporter"
|
|
|
|
|
port {
|
|
|
|
|
container_port = 9610
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
volume_mount {
|
|
|
|
|
name = "redfish-exporter-config"
|
|
|
|
|
mount_path = "/etc/prometheus/idrac.yml"
|
|
|
|
|
sub_path = "config.yml"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
volume {
|
|
|
|
|
name = "redfish-exporter-config"
|
|
|
|
|
config_map {
|
|
|
|
|
name = "redfish-exporter-config"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dns_config {
|
|
|
|
|
option {
|
|
|
|
|
name = "ndots"
|
|
|
|
|
value = "2"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
[infra] Sweep dns_config ignore_changes across all pod-owning resources [ci skip]
## Context
Wave 3A (commit c9d221d5) added the `# KYVERNO_LIFECYCLE_V1` marker to the
27 pre-existing `ignore_changes = [...dns_config]` sites so they could be
grepped and audited. It did NOT address pod-owning resources that were
simply missing the suppression entirely. Post-Wave-3A sampling (2026-04-18)
found that navidrome, f1-stream, frigate, servarr, monitoring, crowdsec,
and many other stacks showed perpetual `dns_config` drift every plan
because their `kubernetes_deployment` / `kubernetes_stateful_set` /
`kubernetes_cron_job_v1` resources had no `lifecycle {}` block at all.
Root cause (same as Wave 3A): Kyverno's admission webhook stamps
`dns_config { option { name = "ndots"; value = "2" } }` on every pod's
`spec.template.spec.dns_config` to prevent NxDomain search-domain flooding
(see `k8s-ndots-search-domain-nxdomain-flood` skill). Without `ignore_changes`
on every Terraform-managed pod-owner, Terraform repeatedly tries to strip
the injected field.
## This change
Extends the Wave 3A convention by sweeping EVERY `kubernetes_deployment`,
`kubernetes_stateful_set`, `kubernetes_daemon_set`, `kubernetes_cron_job_v1`,
`kubernetes_job_v1` (+ their `_v1` variants) in the repo and ensuring each
carries the right `ignore_changes` path:
- **kubernetes_deployment / stateful_set / daemon_set / job_v1**:
`spec[0].template[0].spec[0].dns_config`
- **kubernetes_cron_job_v1**:
`spec[0].job_template[0].spec[0].template[0].spec[0].dns_config`
(extra `job_template[0]` nesting — the CronJob's PodTemplateSpec is
one level deeper)
Each injection / extension is tagged `# KYVERNO_LIFECYCLE_V1: Kyverno
admission webhook mutates dns_config with ndots=2` inline so the
suppression is discoverable via `rg 'KYVERNO_LIFECYCLE_V1' stacks/`.
Two insertion paths are handled by a Python pass (`/tmp/add_dns_config_ignore.py`):
1. **No existing `lifecycle {}`**: inject a brand-new block just before the
resource's closing `}`. 108 new blocks on 93 files.
2. **Existing `lifecycle {}` (usually for `DRIFT_WORKAROUND: CI owns image tag`
from Wave 4, commit a62b43d1)**: extend its `ignore_changes` list with the
dns_config path. Handles both inline (`= [x]`) and multiline
(`= [\n x,\n]`) forms; ensures the last pre-existing list item carries
a trailing comma so the extended list is valid HCL. 34 extensions.
The script skips anything already mentioning `dns_config` inside an
`ignore_changes`, so re-running is a no-op.
## Scale
- 142 total lifecycle injections/extensions
- 93 `.tf` files touched
- 108 brand-new `lifecycle {}` blocks + 34 extensions of existing ones
- Every Tier 0 and Tier 1 stack with a pod-owning resource is covered
- Together with Wave 3A's 27 pre-existing markers → **169 greppable
`KYVERNO_LIFECYCLE_V1` dns_config sites across the repo**
## What is NOT in this change
- `stacks/trading-bot/main.tf` — entirely commented-out block (`/* … */`).
Python script touched the file, reverted manually.
- `_template/main.tf.example` skeleton — kept minimal on purpose; any
future stack created from it should either inherit the Wave 3A one-line
form or add its own on first `kubernetes_deployment`.
- `terraform fmt` fixes to pre-existing alignment issues in meshcentral,
nvidia/modules/nvidia, vault — unrelated to this commit. Left for a
separate fmt-only pass.
- Non-pod resources (`kubernetes_service`, `kubernetes_secret`,
`kubernetes_manifest`, etc.) — they don't own pods so they don't get
Kyverno dns_config mutation.
## Verification
Random sample post-commit:
```
$ cd stacks/navidrome && ../../scripts/tg plan → No changes.
$ cd stacks/f1-stream && ../../scripts/tg plan → No changes.
$ cd stacks/frigate && ../../scripts/tg plan → No changes.
$ rg -c 'KYVERNO_LIFECYCLE_V1' stacks/ --include='*.tf' --include='*.tf.example' \
| awk -F: '{s+=$2} END {print s}'
169
```
## Reproduce locally
1. `git pull`
2. `rg 'KYVERNO_LIFECYCLE_V1' stacks/ | wc -l` → 169+
3. `cd stacks/navidrome && ../../scripts/tg plan` → expect 0 drift on
the deployment's dns_config field.
Refs: code-seq (Wave 3B dns_config class closed; kubernetes_manifest
annotation class handled separately in 8d94688d for tls_secret)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 21:19:48 +00:00
|
|
|
lifecycle {
|
|
|
|
|
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
|
|
|
|
|
ignore_changes = [spec[0].template[0].spec[0].dns_config]
|
|
|
|
|
}
|
2026-03-17 21:34:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
resource "kubernetes_service" "idrac-redfish-exporter" {
|
|
|
|
|
metadata {
|
|
|
|
|
name = "idrac-redfish-exporter"
|
|
|
|
|
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
|
|
|
|
labels = {
|
|
|
|
|
"app" = "idrac-redfish-exporter"
|
|
|
|
|
}
|
|
|
|
|
# annotations = {
|
|
|
|
|
# "prometheus.io/scrape" = "true"
|
|
|
|
|
# "prometheus.io/path" = "/metrics"
|
|
|
|
|
# "prometheus.io/port" = "9090"
|
|
|
|
|
# }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
spec {
|
|
|
|
|
selector = {
|
|
|
|
|
"app" = "idrac-redfish-exporter"
|
|
|
|
|
}
|
|
|
|
|
port {
|
|
|
|
|
name = "http"
|
|
|
|
|
port = "9090"
|
|
|
|
|
target_port = "9610"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
module "idrac-redfish-exporter-ingress" {
|
2026-05-10 21:48:29 +00:00
|
|
|
source = "../../../../modules/kubernetes/ingress_factory"
|
|
|
|
|
# Auth disabled: HA Sofia + Prometheus scrape this endpoint
|
|
|
|
|
# programmatically (no browser, no SSO cookie). The
|
|
|
|
|
# allow_local_access_only middleware (192.168.0.0/16 + 10.0.0.0/8)
|
|
|
|
|
# already gates external access, so layering Authentik on top only
|
|
|
|
|
# breaks the REST sensor in HA Sofia (it gets a 302 to authentik.viktorbarzin.me
|
|
|
|
|
# and parses HTML instead of metrics).
|
infra: document auth = "app|none" tier on every legacy ingress
Sweep through the 30+ stacks that predated the auth = "app" tier
and were tagged auth = "none" without a comment explaining why
they weren't behind Authentik. Each is now self-documenting at the
call site, so the tg-level anti-exposure guard passes and future
readers don't have to reverse-engineer the intent.
Flipped 6 stacks from "none" to "app" — their backends have their
own user auth and the new tier records that more accurately:
- navidrome (Subsonic user/password)
- ntfy (deny-all default + user.db tokens)
- nextcloud (WebDAV/CalDAV/CardDAV app passwords)
- vaultwarden (Bitwarden-compatible token auth)
- headscale (OIDC + preauth keys for Tailscale nodes)
- paperless-ngx (app-layer login + API tokens)
Kept "none" with a comment on the rest — they're genuinely public,
webhook receivers, native-protocol endpoints, OAuth callbacks, or
Anubis-fronted: authentik (×2 + guest outpost), beads-server (dolt),
claude-memory (bearer-token MCP), dawarich, ebooks/book-search-api,
fire-planner /api, forgejo (git/OCI native clients), frigate (HA
integration), immich/frame, insta2spotify /api, instagram-poster
(meta fetcher), k8s-portal, matrix (native bearer), monitoring×2
(HA REST scrapes), n8n (webhooks), nvidia, onlyoffice (JWT),
owntracks (HTTP Basic), postiz, privatebin (client-side enc),
rybbit (analytics tracker), send (E2E file drop), tuya-bridge
(API key), vault (own auth + CLI), webhook_handler, woodpecker
(forgejo webhooks + OAuth), xray (×3 VPN transports).
real-estate-crawler/main.tf:400 already had its comment from a
prior edit — not touched here.
No live state changes — auth = "app" produces the same middleware
chain as auth = "none" (verified earlier this session). This commit
is purely documentation + intent-tagging.
2026-05-11 19:25:48 +00:00
|
|
|
# auth = "none": HA Sofia REST sensors poll programmatically without cookies; Authentik OIDC flow incompatible with automation.
|
2026-05-10 21:48:29 +00:00
|
|
|
auth = "none"
|
2026-03-17 21:34:11 +00:00
|
|
|
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
|
|
|
|
name = "idrac-redfish-exporter"
|
|
|
|
|
root_domain = "viktorbarzin.lan"
|
|
|
|
|
tls_secret_name = var.tls_secret_name
|
|
|
|
|
allow_local_access_only = true
|
|
|
|
|
ssl_redirect = false
|
|
|
|
|
port = 9090
|
|
|
|
|
}
|