valia-sites: registry stack for Valia's Pages sites + declarative internal DNS (ADR-0018)
Some checks failed
Build valia-sites-sync / build (push) Waiting to run
ci/woodpecker/push/default Pipeline failed

Valia keeps asking Viktor to host 1-page sites from her Drive folders;
this makes it one map entry. New stacks/valia-sites: per site a CF Pages
project + custom domain + proxied CNAME (bridge adopted via import{}),
a ConfigMap feed (valia-sites-dns) the technitium ingress-dns-sync
script now reconciles internal CNAMEs from (add/update/REMOVE — fixes
the add-only stale-record gotcha), and one shared 10-min CronJob that
mirrors each Content folder (rclone, drive.readonly, stem95su's guards)
and wrangler-deploys ONLY on manifest change (free-tier deploy cap).
Scoped CF Pages token + shared rclone conf in secret/valia-sites; the
Global API Key never enters a pod. cloudflared forgets bridge's record
via removed{} (no destroy). stem95su is in the map dns-parked
(manage_dns=false) until its cutover commit.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-07-03 12:28:06 +00:00
parent 5c42155b81
commit 8b80b4cc41
7 changed files with 478 additions and 17 deletions

360
stacks/valia-sites/main.tf Normal file
View file

@ -0,0 +1,360 @@
# Valia sites (ADR-0018): small static sites authored by Valia in Google Drive,
# served OFF-INFRA on Cloudflare Pages, mirrored by the in-cluster CronJob below
# every 10 minutes. Registering a new site = one entry in local.sites (plus
# Valia sharing the folder with vbarzin@gmail.com). Full runbook:
# docs/runbooks/valia-sites.md
#
# Per site this stack fans out:
# - cloudflare_pages_project + custom domain <name>.viktorbarzin.me
# - public proxied CNAME <name> -> <project>.pages.dev (manage_dns gate)
# - internal split-horizon CNAME via ConfigMap valia-sites-dns consumed by
# the technitium-ingress-dns-sync script (declarative: add/update/REMOVE)
# - a slot in the shared sync CronJob (rclone mirror -> wrangler deploy)
locals {
cloudflare_account_id = "02e035473cfc4834fb10c5d35470d8b4" # vbarzin@gmail.com's account (not a secret)
# THE site registry. Keys are the public subdomain (English, Viktor picks
# CONTEXT.md "Valia site"). folder_id = the Drive folder Valia shared (the
# Content folder); src_path = subfolder holding servable files ("" = root);
# entry_file = what / must serve (staged as index.html at deploy time).
# manage_dns = false parks a site's public CNAME + internal record while the
# name is still owned elsewhere (used for the stem95su ingress cutover).
sites = {
bridge = {
folder_id = "1YWwAtSTsJD9HOzckGRIFXigWqCgYSGEa" # "мост" ОбУ Отец Паисий
src_path = ""
entry_file = "index.html"
manage_dns = true
}
stem95su = {
folder_id = "1cmOI2jRyBJdnrVPgbr4kx2cx_4DY6pm_" # "claude" 95. СУ STEM board
src_path = "stem claude/files"
entry_file = "stem_board.html"
manage_dns = false # flipped true in the cutover commit (record still owned by stacks/stem95su ingress_factory)
}
}
dns_managed_sites = { for k, v in local.sites : k => v if v.manage_dns }
}
# ---------------------------------------------------------------------------
# Cloudflare Pages: project + custom domain per site
# ---------------------------------------------------------------------------
resource "cloudflare_pages_project" "site" {
for_each = local.sites
account_id = local.cloudflare_account_id
name = each.key
production_branch = "main"
}
# bridge was created by hand (wrangler) on 2026-07-03 adopt, don't recreate.
import {
to = cloudflare_pages_project.site["bridge"]
id = "02e035473cfc4834fb10c5d35470d8b4/bridge"
}
resource "cloudflare_pages_domain" "site" {
for_each = local.sites
account_id = local.cloudflare_account_id
project_name = cloudflare_pages_project.site[each.key].name
domain = "${each.key}.viktorbarzin.me"
}
import {
to = cloudflare_pages_domain.site["bridge"]
id = "02e035473cfc4834fb10c5d35470d8b4/bridge/bridge.viktorbarzin.me"
}
# Public proxied CNAME. Gated on manage_dns: a site whose name is still served
# by an in-cluster ingress keeps its ingress_factory record until cutover
# (two records can't share one name).
resource "cloudflare_record" "site" {
for_each = local.dns_managed_sites
zone_id = var.cloudflare_zone_id
name = each.key
content = cloudflare_pages_project.site[each.key].subdomain
type = "CNAME"
proxied = true
ttl = 1
}
# bridge's record predates this stack (created 2026-07-03 in stacks/cloudflared,
# handed off via removed{} there) adopt by id.
import {
to = cloudflare_record.site["bridge"]
id = "fd2c5dd4efe8fe38958944e74d0ced6d/ff4fb6f4900744d4b22de50d3fdd219b"
}
# ---------------------------------------------------------------------------
# Internal split-horizon DNS feed (docs/architecture/dns.md "superset rule"):
# the technitium-ingress-dns-sync script reads this CM and reconciles internal
# CNAMEs for every entry including deleting stale *.pages.dev records when
# an entry disappears (site retired/renamed).
# ---------------------------------------------------------------------------
resource "kubernetes_config_map" "valia_sites_dns" {
metadata {
name = "valia-sites-dns"
namespace = "technitium"
labels = { "app.kubernetes.io/managed-by" = "valia-sites" }
}
data = { for k, v in local.dns_managed_sites : k => cloudflare_pages_project.site[k].subdomain }
}
# ---------------------------------------------------------------------------
# The shared sync CronJob
# ---------------------------------------------------------------------------
resource "kubernetes_namespace" "valia_sites" {
metadata {
name = "valia-sites"
labels = {
"istio-injection" : "disabled"
tier = local.tiers.aux
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: goldilocks-vpa-auto-mode ClusterPolicy stamps this label on every namespace
ignore_changes = [metadata[0].labels["goldilocks.fairwinds.com/vpa-update-mode"]]
}
}
# Secrets: shared drive.readonly rclone conf + the SCOPED CF Pages token
# (Pages Read/Write only the Global API Key never enters a pod).
resource "kubernetes_manifest" "sync_external_secret" {
field_manager {
force_conflicts = true
}
manifest = {
apiVersion = "external-secrets.io/v1"
kind = "ExternalSecret"
metadata = {
name = "valia-sites-sync"
namespace = kubernetes_namespace.valia_sites.metadata[0].name
}
spec = {
refreshInterval = "1h"
secretStoreRef = {
name = "vault-kv"
kind = "ClusterSecretStore"
}
target = { name = "valia-sites-sync" }
data = [
{
secretKey = "rclone.conf"
remoteRef = { key = "valia-sites", property = "rclone_conf" }
},
{
secretKey = "CLOUDFLARE_API_TOKEN"
remoteRef = { key = "valia-sites", property = "cloudflare_pages_token" }
},
{
secretKey = "CLOUDFLARE_ACCOUNT_ID"
remoteRef = { key = "valia-sites", property = "account_id" }
},
]
}
}
depends_on = [kubernetes_namespace.valia_sites]
}
# Site registry rendered for the job (folder ids aren't secrets).
resource "kubernetes_config_map" "sync_config" {
metadata {
name = "valia-sites-config"
namespace = kubernetes_namespace.valia_sites.metadata[0].name
}
data = {
"sites.json" = jsonencode(local.sites)
}
}
# Last-deployed manifest hash per site written by the job (merge-patch), so
# TF must never fight it over data.
resource "kubernetes_config_map" "sync_state" {
metadata {
name = "valia-sites-state"
namespace = kubernetes_namespace.valia_sites.metadata[0].name
}
data = {}
lifecycle {
ignore_changes = [data]
}
}
resource "kubernetes_service_account" "sync" {
metadata {
name = "valia-sites-sync"
namespace = kubernetes_namespace.valia_sites.metadata[0].name
}
}
resource "kubernetes_role" "sync_state" {
metadata {
name = "valia-sites-sync-state"
namespace = kubernetes_namespace.valia_sites.metadata[0].name
}
rule {
api_groups = [""]
resources = ["configmaps"]
resource_names = ["valia-sites-state"]
verbs = ["get", "patch"]
}
}
resource "kubernetes_role_binding" "sync_state" {
metadata {
name = "valia-sites-sync-state"
namespace = kubernetes_namespace.valia_sites.metadata[0].name
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "Role"
name = kubernetes_role.sync_state.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = kubernetes_service_account.sync.metadata[0].name
namespace = kubernetes_namespace.valia_sites.metadata[0].name
}
}
resource "kubernetes_cron_job_v1" "sync" {
metadata {
name = "valia-sites-sync"
namespace = kubernetes_namespace.valia_sites.metadata[0].name
labels = { app = "valia-sites", component = "sync" }
}
spec {
schedule = "*/10 * * * *"
concurrency_policy = "Forbid"
successful_jobs_history_limit = 2
failed_jobs_history_limit = 3
job_template {
metadata {}
spec {
backoff_limit = 1
ttl_seconds_after_finished = 86400
template {
metadata { labels = { app = "valia-sites", component = "sync" } }
spec {
restart_policy = "OnFailure"
service_account_name = kubernetes_service_account.sync.metadata[0].name
container {
name = "sync"
image = "ghcr.io/viktorbarzin/valia-sites-sync:latest"
# Guards mirror stem95su's proven set: hard-fail on Drive
# list/auth errors (visible as a failed Job the chosen
# visibility, ADR-0018), skip quietly when a folder is empty or
# missing its entry file (never wipe a live site), capped
# deletes. Deploy ONLY on remote-manifest change: CF Pages caps
# monthly deployments on the free tier, so 144 no-op
# deploys/day is not an option.
command = ["/bin/sh", "-c", <<-EOT
set -u
cp /config/rclone.conf /tmp/rc.conf
APISERVER="https://kubernetes.default.svc"
SA=/var/run/secrets/kubernetes.io/serviceaccount
KTOKEN=$$(cat $$SA/token); NS=$$(cat $$SA/namespace)
STATE_URL="$$APISERVER/api/v1/namespaces/$$NS/configmaps/valia-sites-state"
FAILED=0
for SITE in $$(jq -r 'keys[]' /sites/sites.json); do
FOLDER=$$(jq -r --arg s "$$SITE" '.[$$s].folder_id' /sites/sites.json)
SRC_PATH=$$(jq -r --arg s "$$SITE" '.[$$s].src_path' /sites/sites.json)
ENTRY=$$(jq -r --arg s "$$SITE" '.[$$s].entry_file' /sites/sites.json)
RC="rclone --config /tmp/rc.conf --drive-root-folder-id=$$FOLDER --drive-skip-gdocs"
# 1. Remote manifest (path+size+hash) metadata only, no download.
MANIFEST=$$($$RC lsf "gdrive:$$SRC_PATH" -R --files-only --format phs 2>/tmp/lsf.err) || {
echo "FATAL [$$SITE]: Drive list failed (auth/network):"; cat /tmp/lsf.err; FAILED=1; continue; }
N=$$(printf '%s\n' "$$MANIFEST" | grep -c . || true)
if [ "$$N" -lt 1 ] || ! printf '%s\n' "$$MANIFEST" | cut -d';' -f1 | grep -qx "$$ENTRY"; then
echo "GUARD [$$SITE]: N=$$N / $$ENTRY missing -- skipping, site untouched"; continue
fi
HASH=$$(printf '%s' "$$MANIFEST" | sha256sum | cut -d' ' -f1)
LAST=$$(curl -sf --cacert $$SA/ca.crt -H "Authorization: Bearer $$KTOKEN" "$$STATE_URL" | jq -r --arg s "$$SITE" '.data[$$s] // ""')
if [ "$$HASH" = "$$LAST" ]; then echo "OK [$$SITE]: unchanged"; continue; fi
# 2. Content changed pull and deploy.
$$RC sync "gdrive:$$SRC_PATH" "/work/$$SITE" --exclude ".DS_Store" --fast-list --transfers 4 --max-delete 25 -v || {
echo "FATAL [$$SITE]: rclone sync failed"; FAILED=1; continue; }
if [ "$$ENTRY" != "index.html" ]; then
cp "/work/$$SITE/$$ENTRY" "/work/$$SITE/index.html"
fi
wrangler pages deploy "/work/$$SITE" --project-name="$$SITE" --branch=main --commit-dirty=true || {
echo "FATAL [$$SITE]: wrangler deploy failed"; FAILED=1; continue; }
curl -sf --cacert $$SA/ca.crt -H "Authorization: Bearer $$KTOKEN" \
-X PATCH -H "Content-Type: application/merge-patch+json" \
-d "{\"data\":{\"$$SITE\":\"$$HASH\"}}" "$$STATE_URL" > /dev/null || {
echo "WARN [$$SITE]: state patch failed (will redeploy next run)"; FAILED=1; }
echo "DEPLOYED [$$SITE]: $$HASH"
done
exit $$FAILED
EOT
]
env {
name = "CLOUDFLARE_API_TOKEN"
value_from {
secret_key_ref {
name = "valia-sites-sync"
key = "CLOUDFLARE_API_TOKEN"
}
}
}
env {
name = "CLOUDFLARE_ACCOUNT_ID"
value_from {
secret_key_ref {
name = "valia-sites-sync"
key = "CLOUDFLARE_ACCOUNT_ID"
}
}
}
resources {
requests = { cpu = "25m", memory = "128Mi" }
limits = { memory = "512Mi" }
}
volume_mount {
name = "rclone-config"
mount_path = "/config"
read_only = true
}
volume_mount {
name = "sites-config"
mount_path = "/sites"
read_only = true
}
volume_mount {
name = "work"
mount_path = "/work"
}
}
volume {
name = "rclone-config"
secret {
secret_name = "valia-sites-sync"
items {
key = "rclone.conf"
path = "rclone.conf"
}
}
}
volume {
name = "sites-config"
config_map { name = kubernetes_config_map.sync_config.metadata[0].name }
}
volume {
name = "work"
empty_dir {}
}
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
depends_on = [kubernetes_manifest.sync_external_secret]
}

View file

@ -0,0 +1,15 @@
# valia-sites-sync: everything the 10-min Content-folder mirror needs, baked in
# (no runtime installs — CronJob pods must not apk/npm on every start).
# rclone pinned to match the proven stem95su version; wrangler pinned to major 4.
FROM node:22-alpine
RUN apk add --no-cache curl unzip ca-certificates jq \
&& curl -fsSL https://downloads.rclone.org/v1.74.3/rclone-v1.74.3-linux-amd64.zip -o /tmp/rclone.zip \
&& unzip -j /tmp/rclone.zip '*/rclone' -d /usr/local/bin \
&& chmod +x /usr/local/bin/rclone \
&& rm /tmp/rclone.zip \
&& npm install -g wrangler@4 \
&& npm cache clean --force
# wrangler writes config/cache under $HOME; the CronJob runs as non-root node (uid 1000)
ENV HOME=/tmp

View file

@ -0,0 +1,8 @@
include "root" {
path = find_in_parent_folders()
}
dependency "platform" {
config_path = "../platform"
skip_outputs = true
}

View file

@ -0,0 +1,3 @@
variable "cloudflare_zone_id" {
type = string
}