fix(technitium): migrate primary to proxmox-lvm-encrypted + post-mortem

SEV1 outage: fsid=0 in PVE /etc/exports broke all NFS subdirectory
mounts from k8s (NFSv4 pseudo-root path resolution). Combined with
lockd failure, both NFSv4 and NFSv3 mount paths broken. Cascaded
into DNS primary, Vault (2/3 pods), Alertmanager, 20+ services.

Changes:
- Primary PVC: NFS (nfs-truenas) → proxmox-lvm-encrypted
- Secondary/tertiary PVCs: proxmox-lvm → proxmox-lvm-encrypted
- Removed NFS module dependency from technitium stack
- Added full post-mortem with prevention plan

[ci skip]

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-04-14 08:18:59 +00:00
parent b239af9b6d
commit 68c8c5b4a0
4 changed files with 173 additions and 19 deletions

View file

@ -6,10 +6,10 @@
# Both pods share the `dns-server=true` label so the DNS LoadBalancer
# in main.tf routes queries to whichever pod is healthy.
resource "kubernetes_persistent_volume_claim" "secondary_config_proxmox" {
resource "kubernetes_persistent_volume_claim" "secondary_config_encrypted" {
wait_until_bound = false
metadata {
name = "technitium-secondary-config-proxmox"
name = "technitium-secondary-config-encrypted"
namespace = kubernetes_namespace.technitium.metadata[0].name
annotations = {
"resize.topolvm.io/threshold" = "80%"
@ -19,7 +19,7 @@ resource "kubernetes_persistent_volume_claim" "secondary_config_proxmox" {
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm"
storage_class_name = "proxmox-lvm-encrypted"
resources {
requests = {
storage = "2Gi"
@ -153,7 +153,7 @@ resource "kubernetes_deployment" "technitium_secondary" {
volume {
name = "nfs-config"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.secondary_config_proxmox.metadata[0].name
claim_name = kubernetes_persistent_volume_claim.secondary_config_encrypted.metadata[0].name
}
}
dns_config {
@ -190,10 +190,10 @@ resource "kubernetes_service" "technitium_secondary_web" {
}
# Tertiary DNS deployment another zone-transfer replica for ETP=Local coverage
resource "kubernetes_persistent_volume_claim" "tertiary_config_proxmox" {
resource "kubernetes_persistent_volume_claim" "tertiary_config_encrypted" {
wait_until_bound = false
metadata {
name = "technitium-tertiary-config-proxmox"
name = "technitium-tertiary-config-encrypted"
namespace = kubernetes_namespace.technitium.metadata[0].name
annotations = {
"resize.topolvm.io/threshold" = "80%"
@ -203,7 +203,7 @@ resource "kubernetes_persistent_volume_claim" "tertiary_config_proxmox" {
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm"
storage_class_name = "proxmox-lvm-encrypted"
resources {
requests = {
storage = "2Gi"
@ -304,7 +304,7 @@ resource "kubernetes_deployment" "technitium_tertiary" {
volume {
name = "config"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.tertiary_config_proxmox.metadata[0].name
claim_name = kubernetes_persistent_volume_claim.tertiary_config_encrypted.metadata[0].name
}
}
dns_config {

View file

@ -1,7 +1,6 @@
variable "tls_secret_name" {}
variable "tier" { type = string }
variable "homepage_token" {}
variable "nfs_server" { type = string }
variable "mysql_host" { type = string }
variable "postgresql_host" { type = string }
variable "technitium_username" { type = string }
@ -84,12 +83,26 @@ resource "kubernetes_config_map" "coredns" {
}
}
module "nfs_config_host" {
source = "../../../../modules/kubernetes/nfs_volume"
name = "technitium-config-host"
namespace = kubernetes_namespace.technitium.metadata[0].name
nfs_server = "192.168.1.127"
nfs_path = "/srv/nfs/technitium"
resource "kubernetes_persistent_volume_claim" "primary_config_encrypted" {
wait_until_bound = false
metadata {
name = "technitium-primary-config-encrypted"
namespace = kubernetes_namespace.technitium.metadata[0].name
annotations = {
"resize.topolvm.io/threshold" = "80%"
"resize.topolvm.io/increase" = "100%"
"resize.topolvm.io/storage_limit" = "5Gi"
}
}
spec {
access_modes = ["ReadWriteOnce"]
storage_class_name = "proxmox-lvm-encrypted"
resources {
requests = {
storage = "2Gi"
}
}
}
}
resource "kubernetes_deployment" "technitium" {
@ -186,7 +199,7 @@ resource "kubernetes_deployment" "technitium" {
volume {
name = "nfs-config"
persistent_volume_claim {
claim_name = module.nfs_config_host.claim_name
claim_name = kubernetes_persistent_volume_claim.primary_config_encrypted.metadata[0].name
}
}
volume {
@ -490,7 +503,7 @@ resource "kubernetes_cron_job_v1" "technitium_password_sync" {
volume {
name = "technitium-data"
persistent_volume_claim {
claim_name = module.nfs_config_host.claim_name
claim_name = kubernetes_persistent_volume_claim.primary_config_encrypted.metadata[0].name
}
}
restart_policy = "OnFailure"