stem95su: retire the in-cluster serving stack — now a Valia site on Pages

Completes the ADR-0018 cutover. The stack is emptied to a tombstone so
CI destroys nginx, the NFS content volume, the ingress, the per-site
gdrive-sync CronJob and the namespace; serving + sync are owned by
stacks/valia-sites since the cutover commits. Catalog + runbook updated
to the migrated state (incl. the one-time 42.9→21.4MB video compression
Viktor approved).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-07-03 15:22:32 +00:00
parent 974c9976e3
commit 21c6e7112e
5 changed files with 18 additions and 317 deletions

View file

@ -1,122 +0,0 @@
# Automatic Google Drive -> site sync (added 2026-06-09; supersedes the
# earlier on-demand-only model now that content is actively maintained).
#
# A CronJob mirrors the READ-ONLY Drive folder "claude" (servable content in
# subfolder "stem claude/files/") onto the NFS content volume every 10 min via
# rclone. rclone is delta-aware: an unchanged run lists ~33 files' metadata and
# transfers nothing, so the schedule is cheap (not a 24MB re-download). nginx
# keeps serving the same volume read-only; updates appear within ~5s (actimeo).
#
# Drive is treated strictly READ-ONLY: scope=drive.readonly and rclone only ever
# reads the remote (sync gdrive: -> /data), never writes back.
#
# TOKEN LONGEVITY: the GCP OAuth app (project home-lab-1700868541205) MUST be
# published to "Production" or its refresh token expires ~weekly and this job
# fails. After publishing, re-mint the token and refresh
# `secret/stem95su.rclone_conf`. A failed run surfaces as a failed Job.
resource "kubernetes_manifest" "rclone_external_secret" {
field_manager {
force_conflicts = true
}
manifest = {
apiVersion = "external-secrets.io/v1"
kind = "ExternalSecret"
metadata = {
name = "stem95su-rclone"
namespace = kubernetes_namespace.stem95su.metadata[0].name
}
spec = {
refreshInterval = "1h"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = { name = "stem95su-rclone" }
data = [{
secretKey = "rclone.conf"
remoteRef = {
key = "stem95su"
property = "rclone_conf"
}
}]
}
}
depends_on = [kubernetes_namespace.stem95su]
}
resource "kubernetes_cron_job_v1" "gdrive_sync" {
metadata {
name = "stem95su-gdrive-sync"
namespace = kubernetes_namespace.stem95su.metadata[0].name
labels = { run = "stem95su", component = "gdrive-sync" }
}
spec {
schedule = "*/10 * * * *"
concurrency_policy = "Forbid"
successful_jobs_history_limit = 2
failed_jobs_history_limit = 3
job_template {
metadata {}
spec {
backoff_limit = 1
ttl_seconds_after_finished = 86400
template {
metadata { labels = { run = "stem95su", component = "gdrive-sync" } }
spec {
restart_policy = "OnFailure"
container {
name = "rclone"
image = "docker.io/rclone/rclone:1.74.3"
# Mirror Drive folder -> /data. Guard: hard-fail on auth/list error
# (so an expired token is visible); skip quietly if the source is
# empty / missing the dashboard (never wipe the live site);
# --max-delete caps catastrophic deletes from a partial listing.
command = ["/bin/sh", "-c", <<-EOT
set -eu
cp /config/rclone.conf /tmp/rc.conf
SRC="gdrive:stem claude/files"
LIST=$(rclone --config /tmp/rc.conf lsf "$SRC" --files-only) || { echo "FATAL: Drive list failed (auth/network)"; exit 1; }
N=$(printf '%s\n' "$LIST" | grep -c . || true)
if [ "$N" -lt 1 ] || ! printf '%s\n' "$LIST" | grep -qx "stem_board.html"; then
echo "GUARD: source N=$N / stem_board.html missing -- skipping, site untouched"; exit 0
fi
echo "source OK ($N files) -- mirroring to /data"
rclone --config /tmp/rc.conf sync "$SRC" /data --exclude ".DS_Store" --fast-list --transfers 4 --max-delete 25 -v
EOT
]
resources {
requests = { cpu = "10m", memory = "64Mi" }
limits = { memory = "192Mi" }
}
volume_mount {
name = "rclone-config"
mount_path = "/config"
read_only = true
}
volume_mount {
name = "content"
mount_path = "/data"
}
}
volume {
name = "rclone-config"
secret { secret_name = "stem95su-rclone" }
}
volume {
name = "content"
persistent_volume_claim {
claim_name = module.nfs_content.claim_name
}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
depends_on = [kubernetes_manifest.rclone_external_secret]
}

View file

@ -1,176 +1,9 @@
# STEM educational platform for 95. СУ Проф. Иван Шишманов" (Sofia).
# Public, open static site at stem95su.viktorbarzin.me. Self-contained HTML
# pages + media authored externally (Gemini exports), served by a stock nginx
# straight off the PVE host NFS NOT baked into an image, so content can be
# updated out-of-band (Nextcloud "PVE NFS Pool" or rsync to /srv/nfs/stem-site)
# without a rebuild. Auto-backed-up offsite by the existing nfs-mirror job.
resource "kubernetes_namespace" "stem95su" {
metadata {
name = "stem95su"
labels = {
"istio-injection" : "disabled"
tier = local.tiers.aux
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
module "tls_secret" {
source = "../../modules/kubernetes/setup_tls_secret"
namespace = kubernetes_namespace.stem95su.metadata[0].name
tls_secret_name = var.tls_secret_name
}
# Content lives on the PVE host NFS. NOTE: the nfs_volume module creates only
# the K8s PV+PVC the export subdir (/srv/nfs/stem-site) must already exist on
# 192.168.1.127 or the pod fails to mount (mount.nfs exit 32). It is created
# during deploy and re-created on demand if ever lost.
module "nfs_content" {
source = "../../modules/kubernetes/nfs_volume"
name = "stem95su-content"
namespace = kubernetes_namespace.stem95su.metadata[0].name
nfs_server = var.nfs_server
nfs_path = "/srv/nfs/stem-site"
storage = "1Gi"
access_modes = ["ReadWriteMany"]
}
# Minimal nginx server block: serve the static dir, with the dashboard
# (stem_board.html) as the directory index so "/" loads the platform home.
# All other pages/assets are reached by their exact filenames (the dashboard
# links to them by name those must not be renamed).
resource "kubernetes_config_map" "nginx_conf" {
metadata {
name = "stem95su-nginx-conf"
namespace = kubernetes_namespace.stem95su.metadata[0].name
}
data = {
"default.conf" = <<-EOT
server {
listen 80;
server_name _;
root /usr/share/nginx/html;
index stem_board.html index.html;
}
EOT
}
}
resource "kubernetes_deployment" "stem95su" {
metadata {
name = "stem95su"
namespace = kubernetes_namespace.stem95su.metadata[0].name
labels = {
run = "stem95su"
tier = local.tiers.aux
}
}
spec {
replicas = 1
selector {
match_labels = {
run = "stem95su"
}
}
template {
metadata {
labels = {
run = "stem95su"
}
}
spec {
container {
image = "nginx:1.28-alpine"
name = "nginx"
resources {
limits = {
memory = "64Mi"
}
requests = {
cpu = "10m"
memory = "64Mi"
}
}
port {
container_port = 80
}
volume_mount {
name = "content"
mount_path = "/usr/share/nginx/html"
read_only = true
}
volume_mount {
name = "nginx-conf"
mount_path = "/etc/nginx/conf.d"
read_only = true
}
readiness_probe {
http_get {
path = "/"
port = 80
}
initial_delay_seconds = 3
period_seconds = 10
}
}
volume {
name = "content"
persistent_volume_claim {
claim_name = module.nfs_content.claim_name
}
}
volume {
name = "nginx-conf"
config_map {
name = kubernetes_config_map.nginx_conf.metadata[0].name
}
}
}
}
}
lifecycle {
ignore_changes = [
spec[0].template[0].spec[0].dns_config, # KYVERNO_LIFECYCLE_V1
]
}
}
resource "kubernetes_service" "stem95su" {
metadata {
name = "stem95su"
namespace = kubernetes_namespace.stem95su.metadata[0].name
labels = {
run = "stem95su"
}
}
spec {
selector = {
run = "stem95su"
}
port {
name = "http"
port = "80"
target_port = "80"
}
}
}
module "ingress" {
source = "../../modules/kubernetes/ingress_factory"
# auth = "none": public static educational site for 95. СУ, open to the internet by design CrowdSec + ai-bot-block gate bots; no login.
auth = "none"
namespace = kubernetes_namespace.stem95su.metadata[0].name
name = "stem95su"
service_name = kubernetes_service.stem95su.metadata[0].name
port = "80"
host = "stem95su"
# DNS moved to stacks/valia-sites (ADR-0018 cutover): the public CNAME now
# points at Cloudflare Pages, not the tunnel. Ingress kept only until the
# serving stack is retired.
dns_type = "none"
tls_secret_name = var.tls_secret_name
}
# stem95su moved OFF-INFRA to Cloudflare Pages (ADR-0018 cutover, 2026-07-03)
# registry entry `stem95su` in stacks/valia-sites; runbook
# docs/runbooks/valia-sites.md. This stack intentionally declares NOTHING:
# the apply that landed this file destroyed the old in-cluster serving
# (nginx + NFS content PVC + ingress + per-site gdrive-sync CronJob +
# namespace). Directory kept only so the destroy could run through CI
# safe to delete the dir + its PG state schema in a later cleanup.
# Harmless leftovers (manual cleanup if ever wanted): /srv/nfs/stem-site on
# the PVE host, and Vault secret/stem95su (superseded by secret/valia-sites).

View file

@ -1,9 +0,0 @@
variable "tls_secret_name" {
type = string
sensitive = true
}
variable "nfs_server" {
type = string
default = "192.168.1.127"
}