stem95su: scheduled Drive->site sync CronJob (every 10m)
CronJob stem95su-gdrive-sync (*/10) mounts the content PVC RW and rclone-syncs the read-only Drive folder "claude" (stem claude/files) onto it (rclone/rclone:1.74.3, scope=drive.readonly, empty-source guard + --max-delete 25). ESO ExternalSecret stem95su-rclone <- Vault secret/stem95su. Requires the GCP OAuth app published to Production or the refresh token expires ~weekly. Lands the gdrive-sync stack on master (it had landed on a feature branch by accident on the shared devvm checkout). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
05b50d2b96
commit
6d224861c4
1168 changed files with 120 additions and 358547 deletions
|
|
@ -1,7 +0,0 @@
|
|||
variable "nfs_server" { type = string }
|
||||
|
||||
module "nfs-csi" {
|
||||
source = "./modules/nfs-csi"
|
||||
tier = local.tiers.cluster
|
||||
nfs_server = var.nfs_server
|
||||
}
|
||||
|
|
@ -1,148 +0,0 @@
|
|||
variable "tier" { type = string }
|
||||
variable "nfs_server" { type = string }
|
||||
|
||||
resource "kubernetes_namespace" "nfs_csi" {
|
||||
metadata {
|
||||
name = "nfs-csi"
|
||||
labels = {
|
||||
tier = var.tier
|
||||
"keel.sh/enrolled" = "true"
|
||||
}
|
||||
}
|
||||
lifecycle {
|
||||
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
|
||||
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
|
||||
}
|
||||
}
|
||||
|
||||
resource "helm_release" "nfs_csi_driver" {
|
||||
namespace = kubernetes_namespace.nfs_csi.metadata[0].name
|
||||
create_namespace = false
|
||||
name = "csi-driver-nfs"
|
||||
atomic = true
|
||||
timeout = 300
|
||||
|
||||
repository = "https://raw.githubusercontent.com/kubernetes-csi/csi-driver-nfs/master/charts"
|
||||
chart = "csi-driver-nfs"
|
||||
# Pinned 2026-05-17. Keel polled and rolled csi-driver-nfs 4.13.1 → 4.13.2,
|
||||
# which broke the cluster:
|
||||
# * Controller pods ended up on k8s-master because the new chart removed
|
||||
# control-plane exclusion from the default node selector.
|
||||
# * Two controller replicas on the same node fought over hostNetwork ports
|
||||
# 19809 (node-driver-registrar) and 29653 (liveness-probe). One replica
|
||||
# CrashLoopBackOff'd with `bind: address already in use`.
|
||||
# * Rolling back live (helm rollback) left zombie containerd containers
|
||||
# holding the ports — only a kubelet restart cleared them.
|
||||
# nfs-csi namespace is in the Kyverno keel exclude list (keel-annotations.tf)
|
||||
# so Keel will not touch it again. This version pin is the second line of
|
||||
# defense against accidental floating-version drift on `terraform apply`.
|
||||
version = "4.13.1"
|
||||
|
||||
values = [yamlencode({
|
||||
controller = {
|
||||
replicas = 2
|
||||
# Required to coexist with the v4.13.1 chart on a 1-master + 4-worker
|
||||
# cluster:
|
||||
# * podAntiAffinity forces the 2 controller replicas onto DIFFERENT
|
||||
# hosts (host network ports 19809/29653 are per-host).
|
||||
# * nodeAffinity excludes the control-plane node entirely so the
|
||||
# scheduler can't pick master when a worker is briefly NotReady.
|
||||
# Without these, Kubernetes can schedule both replicas on the same node
|
||||
# (port conflict) or on master itself (which already runs the DaemonSet
|
||||
# pod and would conflict with it).
|
||||
affinity = {
|
||||
nodeAffinity = {
|
||||
requiredDuringSchedulingIgnoredDuringExecution = {
|
||||
nodeSelectorTerms = [{
|
||||
matchExpressions = [{
|
||||
key = "node-role.kubernetes.io/control-plane"
|
||||
operator = "DoesNotExist"
|
||||
}]
|
||||
}]
|
||||
}
|
||||
}
|
||||
podAntiAffinity = {
|
||||
requiredDuringSchedulingIgnoredDuringExecution = [{
|
||||
labelSelector = {
|
||||
matchLabels = {
|
||||
app = "csi-nfs-controller"
|
||||
}
|
||||
}
|
||||
topologyKey = "kubernetes.io/hostname"
|
||||
}]
|
||||
}
|
||||
}
|
||||
livenessProbe = {
|
||||
httpPort = 29653
|
||||
}
|
||||
resources = {
|
||||
csiProvisioner = {
|
||||
requests = { cpu = "10m", memory = "128Mi" }
|
||||
limits = { memory = "128Mi" }
|
||||
}
|
||||
csiResizer = {
|
||||
requests = { cpu = "10m", memory = "128Mi" }
|
||||
limits = { memory = "128Mi" }
|
||||
}
|
||||
csiSnapshotter = {
|
||||
requests = { cpu = "10m", memory = "128Mi" }
|
||||
limits = { memory = "128Mi" }
|
||||
}
|
||||
nfs = {
|
||||
requests = { cpu = "10m", memory = "128Mi" }
|
||||
limits = { memory = "128Mi" }
|
||||
}
|
||||
livenessProbe = {
|
||||
requests = { cpu = "10m", memory = "64Mi" }
|
||||
limits = { memory = "64Mi" }
|
||||
}
|
||||
}
|
||||
}
|
||||
node = {
|
||||
resources = {
|
||||
nfs = {
|
||||
requests = { cpu = "10m", memory = "128Mi" }
|
||||
limits = { memory = "128Mi" }
|
||||
}
|
||||
livenessProbe = {
|
||||
requests = { cpu = "10m", memory = "64Mi" }
|
||||
limits = { memory = "64Mi" }
|
||||
}
|
||||
nodeDriverRegistrar = {
|
||||
requests = { cpu = "10m", memory = "64Mi" }
|
||||
limits = { memory = "64Mi" }
|
||||
}
|
||||
}
|
||||
}
|
||||
storageClass = {
|
||||
create = false
|
||||
}
|
||||
})]
|
||||
}
|
||||
|
||||
# Historical name retained for PV compatibility — 48 bound PVs reference
|
||||
# storageClassName: nfs-truenas. The actual backend is the Proxmox host NFS
|
||||
# (var.nfs_server = 192.168.1.127) since TrueNAS was decommissioned
|
||||
# 2026-04-13. SC names are immutable on PVs, so renaming would require
|
||||
# migrating every PV. Not worth the churn for a cosmetic change.
|
||||
resource "kubernetes_storage_class" "nfs_truenas" {
|
||||
metadata {
|
||||
name = "nfs-truenas"
|
||||
}
|
||||
storage_provisioner = "nfs.csi.k8s.io"
|
||||
reclaim_policy = "Retain"
|
||||
volume_binding_mode = "Immediate"
|
||||
|
||||
mount_options = [
|
||||
"nfsvers=4",
|
||||
"soft",
|
||||
"timeo=30",
|
||||
"retrans=3",
|
||||
"actimeo=5",
|
||||
]
|
||||
|
||||
parameters = {
|
||||
server = var.nfs_server
|
||||
share = "/srv/nfs"
|
||||
}
|
||||
}
|
||||
|
|
@ -1 +0,0 @@
|
|||
../../secrets
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
include "root" {
|
||||
path = find_in_parent_folders()
|
||||
}
|
||||
|
||||
dependency "infra" {
|
||||
config_path = "../infra"
|
||||
skip_outputs = true
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue