From f8e8f31306c04b245fc5249e2168b453b81056b8 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 9 Jun 2026 08:16:08 +0000 Subject: [PATCH] stem95su: scheduled Drive->site sync CronJob (every 15m) Reverses the earlier on-demand-only call now that the content is actively maintained. New stacks/stem95su/gdrive-sync.tf: - CronJob stem95su-gdrive-sync (*/15) mounts the content PVC RW and `rclone sync`s the read-only Drive folder "claude" (stem claude/files) onto it. In-cluster, so it mirrors straight to NFS (no rsync/ssh hop). rclone/rclone:1.74.3; scope=drive.readonly; empty-source guard + --max-delete 25; .DS_Store excluded. A dead token surfaces as a failed Job. - ESO ExternalSecret stem95su-rclone <- Vault secret/stem95su (rclone_conf). Requires the GCP OAuth app published to Production (else the refresh token expires ~weekly); re-mint + update secret/stem95su after publishing. Verified: manual job ran guard OK + rclone "nothing to transfer", site 200. Co-Authored-By: Claude Opus 4.8 --- .claude/reference/service-catalog.md | 2 +- stacks/stem95su/gdrive-sync.tf | 119 +++++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 stacks/stem95su/gdrive-sync.tf diff --git a/.claude/reference/service-catalog.md b/.claude/reference/service-catalog.md index 633b227f..09b8a8e9 100644 --- a/.claude/reference/service-catalog.md +++ b/.claude/reference/service-catalog.md @@ -116,7 +116,7 @@ | status-page | Status page | status-page | | plotting-book | Book plotting/world-building app | plotting-book | | tripit | Self-hosted TripIt-clone travel-itinerary PWA (FastAPI + SvelteKit SPA, same-origin). CNPG (`tripit` db, Vault static role `pg-tripit`) + RWX NFS trip-doc vault (`/srv/nfs/tripit-documents`) + RWO `proxmox-lvm-encrypted` personal-document vault `tripit-personal-documents` (passports/IDs — AES-256-GCM app-layer envelope, master key `DOCUMENT_ENCRYPTION_KEY` in `secret/tripit`). `auth=required` (Authentik forward-auth, reads `X-authentik-email`); second `auth=none` ingress on `/api/calendar` for HMAC-token-gated `.ics` feed. Email-ingest CronJob `tripit-ingest-plans` (`*/15`) is the SOLE inbound path — forward a booking to plans@viktorbarzin.me (catch-all → spam@), polled read-only and routed ONLY to a registered user / verified linked address (no default-owner fallback; strangers ignored), parsed by local LLM (`qwen3vl-4b`), and the sender is emailed the outcome (Added to trip / Couldn't import). Plus `tripit-poll-flights`, `tripit-run-reminders`, `tripit-transport-nudge`, `tripit-weather-brief`. (The old Gmail-scrape `tripit-ingest-mail` CronJob was removed 2026-06-05.) App secrets in Vault `secret/tripit`. | tripit | -| stem95su | STEM educational platform for **95. СУ „Проф. Иван Шишманов"** (Sofia school) at stem95su.viktorbarzin.me. Public **open** static site (`auth=none` — CrowdSec + ai-bot-block, no login). Stock `nginx:1.28-alpine` serving content **straight off PVE host NFS** `/srv/nfs/stem-site` (RWX `nfs_volume`, mounted read-only) — **NOT** image-baked, so the externally-authored (Gemini-exported) HTML/media updates with no rebuild; auto-backed-up offsite by `nfs-mirror`. **Content source = Google Drive folder "claude"** (id `1cmOI2jRyBJdnrVPgbr4kx2cx_4DY6pm_`, shared Valentina→vbarzin@gmail.com). **Deploy is ON-DEMAND, no scheduled job** (deliberate — short-term content, avoid rotting artifacts): mirror Drive→NFS via a throwaway `rclone/rclone` container using the existing `google_workspace` OAuth creds in Vault `secret/viktor` (`google_workspace_mcp_token_json`) → rsync to `/srv/nfs/stem-site` (empty-source guard). Just ask Claude to "sync stem95su from Drive" (recipe in claude-memory). Nextcloud "PVE NFS Pool"/rsync still works as a manual fallback. Dashboard `stem_board.html` served at `/` via a small nginx ConfigMap (`index`). No DB, no in-cluster secrets. Reference impl for the NFS-backed static-site pattern (see patterns.md). | stem95su | +| stem95su | STEM educational platform for **95. СУ „Проф. Иван Шишманов"** (Sofia school) at stem95su.viktorbarzin.me. Public **open** static site (`auth=none` — CrowdSec + ai-bot-block, no login). Stock `nginx:1.28-alpine` serving content **straight off PVE host NFS** `/srv/nfs/stem-site` (RWX `nfs_volume`, mounted read-only) — **NOT** image-baked, so the externally-authored (Gemini-exported) HTML/media updates with no rebuild; auto-backed-up offsite by `nfs-mirror`. **Content source = Google Drive folder "claude"** (id `1cmOI2jRyBJdnrVPgbr4kx2cx_4DY6pm_`, shared Valentina→vbarzin@gmail.com). **Deploy = scheduled mirror** (since 2026-06-09, reversed the earlier on-demand-only call once content went active): CronJob `stem95su-gdrive-sync` (`*/15`, `stacks/stem95su/gdrive-sync.tf`) mounts the content PVC RW and `rclone sync`s the Drive folder onto it (`docker.io/rclone/rclone:1.74.3`, `scope=drive.readonly` — Drive is READ-ONLY; empty-source guard + `--max-delete 25` so a partial listing can't wipe the site). rclone creds (OAuth refresh-token) in Vault `secret/stem95su` (`rclone_conf`) → ESO secret `stem95su-rclone`. **Requires the GCP OAuth app (project home-lab-1700868541205) published to "Production"** or the refresh token expires ~weekly (re-mint + `vault kv put secret/stem95su rclone_conf=…` after publishing); a dead token surfaces as a failed Job. Manual on-demand sync still possible (throwaway rclone container from devvm; recipe in claude-memory). Nextcloud "PVE NFS Pool"/rsync is a manual fallback. Dashboard `stem_board.html` served at `/` via a small nginx ConfigMap (`index`). No DB, no in-cluster secrets. Reference impl for the NFS-backed static-site pattern (see patterns.md). | stem95su | | trek | **TRIAL (2026-06-05)** — self-hosted group-trip planner (upstream [TREK](https://github.com/mauriceboe/TREK), `mauriceboe/trek:3.0.22`, AGPL-3.0). Solo evaluation behind Authentik forward-auth (`auth=required`) before deciding build-vs-adopt; covers collaborative trip planning + accommodation records + activities + per-person budget splitting on free OpenStreetMap (no paid maps key). SQLite + uploads on `proxmox-lvm-encrypted` (`trek-data-encrypted` 2Gi, `trek-uploads-encrypted` 5Gi). For the trial only: `ENCRYPTION_KEY` is TREK-auto-generated onto the data PVC and the bootstrap admin (`admin@trek.local`) is printed to pod logs — NO Vault/ESO wiring (graduation TODO: move key to `secret/trek` + ESO, add an app-level SQLite backup CronJob since host file-backup can't read the LUKS PVC, wire TREK↔Authentik OIDC). Pinned image, TF-managed (no CI/Keel). Availability-poll companion (Rallly) deferred. Teardown: `tg destroy` in `stacks/trek`. | trek | ## Cloudflare Domains diff --git a/stacks/stem95su/gdrive-sync.tf b/stacks/stem95su/gdrive-sync.tf new file mode 100644 index 00000000..59cf065d --- /dev/null +++ b/stacks/stem95su/gdrive-sync.tf @@ -0,0 +1,119 @@ +# Automatic Google Drive -> site sync (added 2026-06-09; supersedes the +# earlier on-demand-only model now that content is actively maintained). +# +# A CronJob mirrors the READ-ONLY Drive folder "claude" (servable content in +# subfolder "stem claude/files/") onto the NFS content volume every 15 min via +# rclone. rclone is delta-aware: an unchanged run lists ~33 files' metadata and +# transfers nothing, so the schedule is cheap (not a 24MB re-download). nginx +# keeps serving the same volume read-only; updates appear within ~5s (actimeo). +# +# Drive is treated strictly READ-ONLY: scope=drive.readonly and rclone only ever +# reads the remote (sync gdrive: -> /data), never writes back. +# +# TOKEN LONGEVITY: the GCP OAuth app (project home-lab-1700868541205) MUST be +# published to "Production" or its refresh token expires ~weekly and this job +# fails. After publishing, re-mint the token and refresh +# `secret/stem95su.rclone_conf`. A failed run surfaces as a failed Job. + +resource "kubernetes_manifest" "rclone_external_secret" { + manifest = { + apiVersion = "external-secrets.io/v1beta1" + kind = "ExternalSecret" + metadata = { + name = "stem95su-rclone" + namespace = kubernetes_namespace.stem95su.metadata[0].name + } + spec = { + refreshInterval = "1h" + secretStoreRef = { + name = "vault-kv" + kind = "ClusterSecretStore" + } + target = { name = "stem95su-rclone" } + data = [{ + secretKey = "rclone.conf" + remoteRef = { + key = "stem95su" + property = "rclone_conf" + } + }] + } + } + depends_on = [kubernetes_namespace.stem95su] +} + +resource "kubernetes_cron_job_v1" "gdrive_sync" { + metadata { + name = "stem95su-gdrive-sync" + namespace = kubernetes_namespace.stem95su.metadata[0].name + labels = { run = "stem95su", component = "gdrive-sync" } + } + spec { + schedule = "*/15 * * * *" + concurrency_policy = "Forbid" + successful_jobs_history_limit = 2 + failed_jobs_history_limit = 3 + job_template { + metadata {} + spec { + backoff_limit = 1 + ttl_seconds_after_finished = 86400 + template { + metadata { labels = { run = "stem95su", component = "gdrive-sync" } } + spec { + restart_policy = "OnFailure" + container { + name = "rclone" + image = "docker.io/rclone/rclone:1.74.3" + # Mirror Drive folder -> /data. Guard: hard-fail on auth/list error + # (so an expired token is visible); skip quietly if the source is + # empty / missing the dashboard (never wipe the live site); + # --max-delete caps catastrophic deletes from a partial listing. + command = ["/bin/sh", "-c", <<-EOT + set -eu + cp /config/rclone.conf /tmp/rc.conf + SRC="gdrive:stem claude/files" + LIST=$(rclone --config /tmp/rc.conf lsf "$SRC" --files-only) || { echo "FATAL: Drive list failed (auth/network)"; exit 1; } + N=$(printf '%s\n' "$LIST" | grep -c . || true) + if [ "$N" -lt 1 ] || ! printf '%s\n' "$LIST" | grep -qx "stem_board.html"; then + echo "GUARD: source N=$N / stem_board.html missing -- skipping, site untouched"; exit 0 + fi + echo "source OK ($N files) -- mirroring to /data" + rclone --config /tmp/rc.conf sync "$SRC" /data --exclude ".DS_Store" --fast-list --transfers 4 --max-delete 25 -v + EOT + ] + resources { + requests = { cpu = "10m", memory = "64Mi" } + limits = { memory = "192Mi" } + } + volume_mount { + name = "rclone-config" + mount_path = "/config" + read_only = true + } + volume_mount { + name = "content" + mount_path = "/data" + } + } + volume { + name = "rclone-config" + secret { secret_name = "stem95su-rclone" } + } + volume { + name = "content" + persistent_volume_claim { + claim_name = module.nfs_content.claim_name + } + } + } + } + } + } + } + lifecycle { + # KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2 + ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config] + } + depends_on = [kubernetes_manifest.rclone_external_secret] +}