2026-01-17 22:40:35 +00:00
|
|
|
variable "tls_secret_name" {}
|
|
|
|
|
variable "name" {}
|
|
|
|
|
variable "tag" {
|
|
|
|
|
default = "latest"
|
|
|
|
|
}
|
|
|
|
|
variable "tier" { type = string }
|
|
|
|
|
variable "protected" {
|
|
|
|
|
type = bool
|
|
|
|
|
default = false
|
|
|
|
|
}
|
|
|
|
|
variable "listenbrainz_token" {
|
2026-03-14 08:51:45 +00:00
|
|
|
type = string
|
|
|
|
|
default = null
|
2026-03-07 14:30:36 +00:00
|
|
|
sensitive = true
|
2026-01-17 22:40:35 +00:00
|
|
|
}
|
|
|
|
|
variable "genius_token" {
|
2026-03-14 08:51:45 +00:00
|
|
|
type = string
|
|
|
|
|
default = null
|
2026-03-07 14:30:36 +00:00
|
|
|
sensitive = true
|
2026-01-17 22:40:35 +00:00
|
|
|
}
|
|
|
|
|
variable "dab_visitor_id" {
|
|
|
|
|
type = string
|
|
|
|
|
default = null
|
|
|
|
|
}
|
|
|
|
|
variable "dab_session" {
|
|
|
|
|
type = string
|
|
|
|
|
default = null
|
|
|
|
|
}
|
|
|
|
|
variable "gemini_api_key" {
|
2026-03-14 08:51:45 +00:00
|
|
|
type = string
|
|
|
|
|
default = null
|
2026-03-07 14:30:36 +00:00
|
|
|
sensitive = true
|
2026-01-17 22:40:35 +00:00
|
|
|
}
|
|
|
|
|
variable "memory_limit" {
|
|
|
|
|
type = string
|
2026-04-06 11:57:47 +03:00
|
|
|
default = "384Mi"
|
2026-01-17 22:40:35 +00:00
|
|
|
}
|
|
|
|
|
variable "cpu_request" {
|
|
|
|
|
type = string
|
[ci skip] right-size all pod resources based on VPA + live metrics audit
Full cluster resource audit: cross-referenced Goldilocks VPA recommendations,
live kubectl top metrics, and Terraform definitions for 100+ containers.
Critical fixes:
- dashy: CPU throttled at 98% (490m/500m) → 2 CPU limit
- stirling-pdf: CPU throttled at 99.7% (299m/300m) → 2 CPU limit
- traefik auth-proxy/bot-block-proxy: mem limit 32Mi → 128Mi
Added explicit resources to ~40 containers that had none:
- audiobookshelf, changedetection, cyberchef, dawarich, diun, echo,
excalidraw, freshrss, hackmd, isponsorblocktv, linkwarden, n8n,
navidrome, ntfy, owntracks, privatebin, send, shadowsocks, tandoor,
tor-proxy, wealthfolio, networking-toolbox, rybbit, mailserver,
cloudflared, pgadmin, phpmyadmin, crowdsec-web, xray, wireguard,
k8s-portal, tuya-bridge, ollama-ui, whisper, piper, immich-server,
immich-postgresql, osrm-foot
GPU containers: added CPU/mem alongside GPU limits:
- ollama: removed CPU/mem limits (models vary in size), keep GPU only
- frigate: req 500m/2Gi, lim 4/8Gi + GPU
- immich-ml: req 100m/1Gi, lim 2/4Gi + GPU
Right-sized ~25 over-provisioned containers:
- kms-web-page: 500m/512Mi → 50m/64Mi (was using 0m/10Mi)
- onlyoffice: CPU 8 → 2 (VPA upper 45m)
- realestate-crawler-api: CPU 2000m → 250m
- blog/travel-blog/webhook-handler: 500m → 100m
- coturn/health/plotting-book: reduced to match actual usage
Conservative methodology: limits = max(VPA upper * 2, live usage * 2)
2026-03-01 19:18:50 +00:00
|
|
|
default = "15m"
|
2026-01-17 22:40:35 +00:00
|
|
|
}
|
|
|
|
|
variable "memory_request" {
|
|
|
|
|
type = string
|
2026-04-06 11:57:47 +03:00
|
|
|
default = "256Mi"
|
2026-01-17 22:40:35 +00:00
|
|
|
}
|
2026-03-07 16:41:36 +00:00
|
|
|
variable "extra_annotations" {
|
|
|
|
|
type = map(string)
|
|
|
|
|
default = {}
|
|
|
|
|
}
|
2026-04-06 11:57:47 +03:00
|
|
|
variable "navidrome_scan_url" {
|
|
|
|
|
type = string
|
|
|
|
|
default = ""
|
|
|
|
|
sensitive = true
|
|
|
|
|
}
|
|
|
|
|
variable "ha_sofia_url" {
|
|
|
|
|
type = string
|
|
|
|
|
default = ""
|
|
|
|
|
}
|
|
|
|
|
variable "ha_sofia_token" {
|
|
|
|
|
type = string
|
|
|
|
|
default = ""
|
|
|
|
|
sensitive = true
|
|
|
|
|
}
|
|
|
|
|
variable "nfs_music_server" {
|
|
|
|
|
type = string
|
2026-04-13 14:41:15 +00:00
|
|
|
default = "192.168.1.127"
|
2026-04-06 11:57:47 +03:00
|
|
|
}
|
|
|
|
|
variable "nfs_music_path" {
|
|
|
|
|
type = string
|
2026-04-13 14:41:15 +00:00
|
|
|
default = "/srv/nfs/freedify-music"
|
2026-04-06 11:57:47 +03:00
|
|
|
}
|
2026-01-17 22:40:35 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
resource "kubernetes_deployment" "freedify" {
|
|
|
|
|
metadata {
|
|
|
|
|
name = "music-${var.name}"
|
|
|
|
|
namespace = "freedify"
|
|
|
|
|
labels = {
|
|
|
|
|
app = "music-${var.name}"
|
|
|
|
|
tier = var.tier
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
spec {
|
|
|
|
|
replicas = 1
|
|
|
|
|
strategy {
|
|
|
|
|
type = "RollingUpdate"
|
|
|
|
|
}
|
|
|
|
|
selector {
|
|
|
|
|
match_labels = {
|
|
|
|
|
app = "music-${var.name}"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
template {
|
|
|
|
|
metadata {
|
|
|
|
|
annotations = {
|
|
|
|
|
"diun.enable" = "true"
|
|
|
|
|
"diun.include_tags" = "^${var.tag}$"
|
|
|
|
|
}
|
|
|
|
|
labels = {
|
|
|
|
|
app = "music-${var.name}"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
spec {
|
2026-04-06 11:57:47 +03:00
|
|
|
image_pull_secrets {
|
|
|
|
|
name = "registry-credentials"
|
|
|
|
|
}
|
2026-01-17 22:40:35 +00:00
|
|
|
container {
|
[forgejo] Phases 3+4+5: cutover, decommission, docs sweep
End of forgejo-registry-consolidation. After Phase 0/1 already landed
(Forgejo ready, dual-push CI, integrity probe, retention CronJob,
images migrated via forgejo-migrate-orphan-images.sh), this commit
flips everything off registry.viktorbarzin.me onto Forgejo and
removes the legacy infrastructure.
Phase 3 — image= flips:
* infra/stacks/{payslip-ingest,job-hunter,claude-agent-service,
fire-planner,freedify/factory,chrome-service,beads-server}/main.tf
— image= now points to forgejo.viktorbarzin.me/viktor/<name>.
* infra/stacks/claude-memory/main.tf — also moved off DockerHub
(viktorbarzin/claude-memory-mcp:17 → forgejo.viktorbarzin.me/viktor/...).
* infra/.woodpecker/{default,drift-detection}.yml — infra-ci pulled
from Forgejo. build-ci-image.yml dual-pushes still until next
build cycle confirms Forgejo as canonical.
* /home/wizard/code/CLAUDE.md — claude-memory-mcp install URL updated.
Phase 4 — decommission registry-private:
* registry-credentials Secret: dropped registry.viktorbarzin.me /
registry.viktorbarzin.me:5050 / 10.0.20.10:5050 auths entries.
Forgejo entry is the only one left.
* infra/stacks/infra/main.tf cloud-init: dropped containerd
hosts.toml entries for registry.viktorbarzin.me +
10.0.20.10:5050. (Existing nodes already had the file removed
manually by `setup-forgejo-containerd-mirror.sh` rollout — the
cloud-init template only fires on new VM provision.)
* infra/modules/docker-registry/docker-compose.yml: registry-private
service block removed; nginx 5050 port mapping dropped. Pull-
through caches for upstream registries (5000/5010/5020/5030/5040)
stay on the VM permanently.
* infra/modules/docker-registry/nginx_registry.conf: upstream
`private` block + port 5050 server block removed.
* infra/stacks/monitoring/modules/monitoring/main.tf: registry_
integrity_probe + registry_probe_credentials resources stripped.
forgejo_integrity_probe is the only manifest probe now.
Phase 5 — final docs sweep:
* infra/docs/runbooks/registry-vm.md — VM scope reduced to pull-
through caches; forgejo-registry-breakglass.md cross-ref added.
* infra/docs/architecture/ci-cd.md — registry component table +
diagram now reflect Forgejo. Pre-migration root-cause sentence
preserved as historical context with a pointer to the design doc.
* infra/docs/architecture/monitoring.md — Registry Integrity Probe
row updated to point at the Forgejo probe.
* infra/.claude/CLAUDE.md — Private registry section rewritten end-
to-end (auth, retention, integrity, where the bake came from).
* prometheus_chart_values.tpl — RegistryManifestIntegrityFailure
alert annotation simplified now that only one registry is in
scope.
Operational follow-up (cannot be done from a TF apply):
1. ssh root@10.0.20.10 — edit /opt/registry/docker-compose.yml to
match the new template AND `docker compose up -d --remove-orphans`
to actually stop the registry-private container. Memory id=1078
confirms cloud-init won't redeploy on TF apply alone.
2. After 1 week of no incidents, `rm -rf /opt/registry/data/private/`
on the VM (~2.6GB freed).
3. Open the dual-push step in build-ci-image.yml and drop
registry.viktorbarzin.me:5050 from the `repo:` list — at that
point the post-push integrity check at line 33-107 also needs
to be repointed at Forgejo or removed (the per-build verify is
redundant with the every-15min Forgejo probe).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-07 18:30:02 +00:00
|
|
|
# Phase 3 cutover 2026-05-07 — Forgejo registry consolidation.
|
|
|
|
|
image = "forgejo.viktorbarzin.me/viktor/freedify:${var.tag}"
|
2026-01-17 22:40:35 +00:00
|
|
|
name = "freedify"
|
|
|
|
|
|
|
|
|
|
port {
|
|
|
|
|
container_port = 8000
|
|
|
|
|
}
|
|
|
|
|
env {
|
|
|
|
|
name = "LISTENBRAINZ_TOKEN"
|
|
|
|
|
value = var.listenbrainz_token
|
|
|
|
|
}
|
|
|
|
|
env {
|
|
|
|
|
name = "GENIUS_ACCESS_TOKEN"
|
|
|
|
|
value = var.genius_token
|
|
|
|
|
}
|
|
|
|
|
env {
|
|
|
|
|
name = "DAB_SESSION"
|
|
|
|
|
value = var.dab_session
|
|
|
|
|
}
|
|
|
|
|
env {
|
|
|
|
|
name = "DAB_VISITOR_ID"
|
|
|
|
|
value = var.dab_visitor_id
|
|
|
|
|
}
|
|
|
|
|
env {
|
|
|
|
|
name = "GEMINI_API_KEY"
|
|
|
|
|
value = var.gemini_api_key
|
|
|
|
|
}
|
2026-04-06 11:57:47 +03:00
|
|
|
env {
|
|
|
|
|
name = "MUSIC_LIBRARY_PATH"
|
|
|
|
|
value = "/music-library"
|
|
|
|
|
}
|
|
|
|
|
env {
|
|
|
|
|
name = "AUTO_SAVE_TO_LIBRARY"
|
|
|
|
|
value = "true"
|
|
|
|
|
}
|
|
|
|
|
env {
|
|
|
|
|
name = "NAVIDROME_SCAN_URL"
|
|
|
|
|
value = var.navidrome_scan_url
|
|
|
|
|
}
|
|
|
|
|
env {
|
|
|
|
|
name = "HA_SOFIA_URL"
|
|
|
|
|
value = var.ha_sofia_url
|
|
|
|
|
}
|
|
|
|
|
env {
|
|
|
|
|
name = "HA_SOFIA_TOKEN"
|
|
|
|
|
value = var.ha_sofia_token
|
|
|
|
|
}
|
|
|
|
|
volume_mount {
|
|
|
|
|
name = "music-library"
|
|
|
|
|
mount_path = "/music-library"
|
|
|
|
|
}
|
2026-01-17 22:40:35 +00:00
|
|
|
resources {
|
|
|
|
|
limits = {
|
|
|
|
|
memory = var.memory_limit
|
|
|
|
|
}
|
|
|
|
|
requests = {
|
|
|
|
|
cpu = var.cpu_request
|
|
|
|
|
memory = var.memory_request
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-04-06 11:57:47 +03:00
|
|
|
readiness_probe {
|
|
|
|
|
http_get {
|
|
|
|
|
path = "/"
|
|
|
|
|
port = 8000
|
|
|
|
|
}
|
|
|
|
|
initial_delay_seconds = 5
|
|
|
|
|
period_seconds = 10
|
|
|
|
|
}
|
|
|
|
|
liveness_probe {
|
|
|
|
|
http_get {
|
|
|
|
|
path = "/"
|
|
|
|
|
port = 8000
|
|
|
|
|
}
|
|
|
|
|
initial_delay_seconds = 10
|
|
|
|
|
period_seconds = 30
|
|
|
|
|
failure_threshold = 3
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
volume {
|
|
|
|
|
name = "music-library"
|
|
|
|
|
nfs {
|
|
|
|
|
server = var.nfs_music_server
|
|
|
|
|
path = var.nfs_music_path
|
|
|
|
|
}
|
2026-01-17 22:40:35 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-04-06 11:57:47 +03:00
|
|
|
lifecycle {
|
[infra] Establish KYVERNO_LIFECYCLE_V1 drift-suppression convention [ci skip]
## Context
Phase 1 of the state-drift consolidation audit (plan Wave 3) identified that
the entire repo leans on a repeated `lifecycle { ignore_changes = [...dns_config] }`
snippet to suppress Kyverno's admission-webhook dns_config mutation (the ndots=2
override that prevents NxDomain search-domain flooding). 27 occurrences across
19 stacks. Without this suppression, every pod-owning resource shows perpetual
TF plan drift.
The original plan proposed a shared `modules/kubernetes/kyverno_lifecycle/`
module emitting the ignore-paths list as an output that stacks would consume in
their `ignore_changes` blocks. That approach is architecturally impossible:
Terraform's `ignore_changes` meta-argument accepts only static attribute paths
— it rejects module outputs, locals, variables, and any expression (the HCL
spec evaluates `lifecycle` before the regular expression graph). So a DRY
module cannot exist. The canonical pattern IS the repeated snippet.
What the snippet was missing was a *discoverability tag* so that (a) new
resources can be validated for compliance, (b) the existing 27 sites can be
grep'd in a single command, and (c) future maintainers understand the
convention rather than each reinventing it.
## This change
- Introduces `# KYVERNO_LIFECYCLE_V1` as the canonical marker comment.
Attached inline on every `spec[0].template[0].spec[0].dns_config` line
(or `spec[0].job_template[0].spec[0]...` for CronJobs) across all 27
existing suppression sites.
- Documents the convention with rationale and copy-paste snippets in
`AGENTS.md` → new "Kyverno Drift Suppression" section.
- Expands the existing `.claude/CLAUDE.md` Kyverno ndots note to reference
the marker and explain why the module approach is blocked.
- Updates `_template/main.tf.example` so every new stack starts compliant.
## What is NOT in this change
- The `kubernetes_manifest` Kyverno annotation drift (beads `code-seq`)
— that is Phase B with a sibling `# KYVERNO_MANIFEST_V1` marker.
- Behavioral changes — every `ignore_changes` list is byte-identical
save for the inline comment.
- The fallback module the original plan anticipated — skipped because
Terraform rejects expressions in `ignore_changes`.
- `terraform fmt` cleanup on adjacent unrelated blocks in three files
(claude-agent-service, freedify/factory, hermes-agent). Reverted to
keep this commit scoped to the convention rollout.
## Before / after
Before (cannot distinguish accidental-forgotten from intentional-convention):
```hcl
lifecycle {
ignore_changes = [spec[0].template[0].spec[0].dns_config]
}
```
After (greppable, self-documenting, discoverable by tooling):
```hcl
lifecycle {
ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1
}
```
## Test Plan
### Automated
```
$ rg -c 'KYVERNO_LIFECYCLE_V1' stacks/ --include='*.tf' --include='*.tf.example' \
| awk -F: '{s+=$2} END {print s}'
27
$ git diff --stat | grep -E '\.(tf|tf\.example|md)$' | wc -l
21
# All code-file diffs are 1 insertion + 1 deletion per marker site,
# except beads-server (3), ebooks (4), immich (3), uptime-kuma (2).
$ git diff --stat stacks/ | tail -1
20 files changed, 45 insertions(+), 28 deletions(-)
```
### Manual Verification
No apply required — HCL comments only. Zero effect on any stack's plan output.
Future audits: `rg 'KYVERNO_LIFECYCLE_V1' stacks/ | wc -l` must grow as new
pod-owning resources are added.
## Reproduce locally
1. `cd infra && git pull`
2. `rg 'KYVERNO_LIFECYCLE_V1' stacks/` → expect 27 hits in 19 files
3. Grep any new `kubernetes_deployment` for the marker; absence = missing
suppression.
Closes: code-28m
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 14:15:51 +00:00
|
|
|
ignore_changes = [spec[0].template[0].spec[0].dns_config] # KYVERNO_LIFECYCLE_V1
|
2026-04-06 11:57:47 +03:00
|
|
|
}
|
2026-01-17 22:40:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
resource "kubernetes_service" "freedify" {
|
|
|
|
|
metadata {
|
|
|
|
|
name = "music-${var.name}"
|
|
|
|
|
namespace = "freedify"
|
|
|
|
|
labels = {
|
|
|
|
|
app = "music-${var.name}"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
spec {
|
|
|
|
|
selector = {
|
|
|
|
|
app = "music-${var.name}"
|
|
|
|
|
}
|
|
|
|
|
port {
|
|
|
|
|
name = "http"
|
|
|
|
|
port = 80
|
|
|
|
|
target_port = 8000
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
module "ingress" {
|
2026-03-07 16:41:36 +00:00
|
|
|
source = "../../../modules/kubernetes/ingress_factory"
|
|
|
|
|
namespace = "freedify"
|
|
|
|
|
name = "music-${var.name}"
|
|
|
|
|
tls_secret_name = var.tls_secret_name
|
2026-04-16 13:45:04 +00:00
|
|
|
dns_type = "non-proxied"
|
2026-03-07 16:41:36 +00:00
|
|
|
protected = var.protected
|
|
|
|
|
extra_annotations = var.extra_annotations
|
2026-01-17 22:40:35 +00:00
|
|
|
}
|
2026-04-06 11:57:47 +03:00
|
|
|
|
|
|
|
|
# Unauthenticated ingress for /api/stream/ — allows AirPlay receivers to fetch audio directly
|
|
|
|
|
resource "kubernetes_ingress_v1" "stream-noauth" {
|
|
|
|
|
metadata {
|
|
|
|
|
name = "music-${var.name}-stream"
|
|
|
|
|
namespace = "freedify"
|
|
|
|
|
annotations = {
|
|
|
|
|
"traefik.ingress.kubernetes.io/router.middlewares" = "traefik-retry@kubernetescrd,traefik-rate-limit@kubernetescrd"
|
|
|
|
|
"traefik.ingress.kubernetes.io/router.entrypoints" = "websecure"
|
|
|
|
|
"traefik.ingress.kubernetes.io/router.priority" = "100"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
spec {
|
|
|
|
|
ingress_class_name = "traefik"
|
|
|
|
|
tls {
|
|
|
|
|
hosts = ["music-${var.name}.viktorbarzin.me"]
|
|
|
|
|
secret_name = var.tls_secret_name
|
|
|
|
|
}
|
|
|
|
|
rule {
|
|
|
|
|
host = "music-${var.name}.viktorbarzin.me"
|
|
|
|
|
http {
|
|
|
|
|
path {
|
|
|
|
|
path = "/api/stream/"
|
|
|
|
|
path_type = "Prefix"
|
|
|
|
|
backend {
|
|
|
|
|
service {
|
|
|
|
|
name = "music-${var.name}"
|
|
|
|
|
port {
|
|
|
|
|
number = 80
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|