[forgejo] Phase 0 of registry consolidation: prepare Forgejo OCI registry

Stage 1 of moving private images off the registry:2 container at
registry.viktorbarzin.me:5050 (which has hit distribution#3324 corruption
3x in 3 weeks) onto Forgejo's built-in OCI registry. No cutover risk —
pods still pull from the existing registry until Phase 3.

What changes:
* Forgejo deployment: memory 384Mi→1Gi, PVC 5Gi→15Gi (cap 50Gi).
  Explicit FORGEJO__packages__ENABLED + CHUNKED_UPLOAD_PATH (defensive,
  v11 default-on).
* ingress_factory: max_body_size variable was declared but never wired
  in after the nginx→Traefik migration. Now creates a per-ingress
  Buffering middleware when set; default null = no limit (preserves
  existing behavior). Forgejo ingress sets max_body_size=5g to allow
  multi-GB layer pushes.
* Cluster-wide registry-credentials Secret: 4th auths entry for
  forgejo.viktorbarzin.me, populated from Vault secret/viktor/
  forgejo_pull_token (cluster-puller PAT, read:package). Existing
  Kyverno ClusterPolicy syncs cluster-wide — no policy edits.
* Containerd hosts.toml redirect: forgejo.viktorbarzin.me → in-cluster
  Traefik LB 10.0.20.200 (avoids hairpin NAT for in-cluster pulls).
  Cloud-init for new VMs + scripts/setup-forgejo-containerd-mirror.sh
  for existing nodes.
* Forgejo retention CronJob (0 4 * * *): keeps newest 10 versions per
  package + always :latest. First 7 days dry-run (DRY_RUN=true);
  flip the local in cleanup.tf after log review.
* Forgejo integrity probe CronJob (*/15): same algorithm as the
  existing registry-integrity-probe. Existing Prometheus alerts
  (RegistryManifestIntegrityFailure et al) made instance-aware so
  they cover both registries during the bake.
* Docs: design+plan in docs/plans/, setup runbook in docs/runbooks/.

Operational note — the apply order is non-trivial because the new
Vault keys (forgejo_pull_token, forgejo_cleanup_token,
secret/ci/global/forgejo_*) must exist BEFORE terragrunt apply in the
kyverno + monitoring + forgejo stacks. The setup runbook documents
the bootstrap sequence.

Phase 1 (per-project dual-push pipelines) follows in subsequent
commits. Bake clock starts when the last project goes dual-push.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-05-07 15:51:34 +00:00
parent b1c21f78b9
commit 5d22b449f9
13 changed files with 1072 additions and 10 deletions

120
stacks/forgejo/cleanup.tf Normal file
View file

@ -0,0 +1,120 @@
# Forgejo container-package retention CronJob.
#
# Forgejo's per-package "Cleanup Rules" UI is not exposed via Terraform
# it's per-user runtime state inside the Forgejo DB. Driving retention from
# a CronJob hitting the public API keeps the policy versioned in this repo.
#
# Auth: a write:package PAT belonging to ci-pusher (same user that pushes
# from CI). DELETE on packages requires write:package scope. PAT lives in
# Vault at secret/viktor/forgejo_cleanup_token.
data "vault_kv_secret_v2" "forgejo_viktor" {
mount = "secret"
name = "viktor"
}
locals {
# Flip to false after first 7 days of dry-run logs look correct.
forgejo_cleanup_dry_run = true
}
resource "kubernetes_config_map" "forgejo_cleanup_script" {
metadata {
name = "forgejo-cleanup-script"
namespace = kubernetes_namespace.forgejo.metadata[0].name
}
data = {
"cleanup.sh" = file("${path.module}/files/cleanup.sh")
}
}
resource "kubernetes_secret" "forgejo_cleanup_token" {
metadata {
name = "forgejo-cleanup-token"
namespace = kubernetes_namespace.forgejo.metadata[0].name
}
type = "Opaque"
data = {
FORGEJO_TOKEN = data.vault_kv_secret_v2.forgejo_viktor.data["forgejo_cleanup_token"]
}
}
resource "kubernetes_cron_job_v1" "forgejo_cleanup" {
metadata {
name = "forgejo-cleanup"
namespace = kubernetes_namespace.forgejo.metadata[0].name
}
spec {
concurrency_policy = "Forbid"
schedule = "0 4 * * *"
failed_jobs_history_limit = 3
successful_jobs_history_limit = 3
job_template {
metadata {}
spec {
backoff_limit = 1
ttl_seconds_after_finished = 3600
template {
metadata {}
spec {
container {
name = "cleanup"
image = "docker.io/library/alpine:3.20"
command = ["/bin/sh", "/scripts/cleanup.sh"]
env {
name = "FORGEJO_TOKEN"
value_from {
secret_key_ref {
name = kubernetes_secret.forgejo_cleanup_token.metadata[0].name
key = "FORGEJO_TOKEN"
}
}
}
env {
name = "FORGEJO_HOST"
value = "http://forgejo.forgejo.svc.cluster.local"
}
env {
name = "FORGEJO_OWNER"
value = "viktor"
}
env {
name = "KEEP_LAST_N"
value = "10"
}
env {
name = "DRY_RUN"
value = local.forgejo_cleanup_dry_run ? "true" : "false"
}
volume_mount {
name = "scripts"
mount_path = "/scripts"
}
resources {
requests = {
cpu = "10m"
memory = "32Mi"
}
limits = {
memory = "96Mi"
}
}
}
volume {
name = "scripts"
config_map {
name = kubernetes_config_map.forgejo_cleanup_script.metadata[0].name
default_mode = "0755"
}
}
restart_policy = "OnFailure"
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}

View file

@ -0,0 +1,109 @@
#!/bin/sh
# Forgejo container-package retention.
#
# For each container package owned by ${FORGEJO_OWNER}, keep newest
# ${KEEP_LAST_N} versions + always keep tag "latest". Deletes the rest via
# DELETE /api/v1/packages/{owner}/container/{name}/{version}.
#
# DRY_RUN=true logs what would be deleted but issues no DELETE calls.
#
# Required env:
# FORGEJO_HOST e.g. http://forgejo.forgejo.svc.cluster.local
# FORGEJO_OWNER e.g. viktor
# FORGEJO_USER PAT owner (write:package scope)
# FORGEJO_TOKEN PAT
# KEEP_LAST_N integer (default 10)
# DRY_RUN true|false (default true)
set -eu
apk add --no-cache curl jq >/dev/null
OWNER="${FORGEJO_OWNER}"
KEEP="${KEEP_LAST_N:-10}"
DRY="${DRY_RUN:-true}"
BASE="${FORGEJO_HOST%/}/api/v1"
AUTH_HEADER="Authorization: token $FORGEJO_TOKEN"
echo "Forgejo cleanup: owner=$OWNER keep_last=$KEEP dry_run=$DRY"
echo "API base: $BASE"
# Page through ALL container packages.
TMPDIR=$(mktemp -d)
trap 'rm -rf "$TMPDIR"' EXIT
ALL="$TMPDIR/all.json"
echo "[]" > "$ALL"
PAGE=1
while :; do
RESP=$(curl -sf -H "$AUTH_HEADER" \
"$BASE/packages/$OWNER?type=container&limit=50&page=$PAGE")
COUNT=$(echo "$RESP" | jq 'length')
if [ "$COUNT" = "0" ]; then break; fi
jq -s '.[0] + .[1]' "$ALL" <(echo "$RESP") > "$TMPDIR/merged.json"
mv "$TMPDIR/merged.json" "$ALL"
PAGE=$((PAGE + 1))
# Safety: never run away.
if [ "$PAGE" -gt 100 ]; then break; fi
done
TOTAL=$(jq 'length' "$ALL")
echo "Found $TOTAL package version(s)."
if [ "$TOTAL" = "0" ]; then
echo "Nothing to do."
exit 0
fi
# Group by name and process each group.
NAMES=$(jq -r '.[].name' "$ALL" | sort -u)
DEL=0
KEPT=0
for NAME in $NAMES; do
# All versions of this name, sorted by created_at descending.
jq --arg n "$NAME" '
[.[] | select(.name == $n)]
| sort_by(.created_at) | reverse
' "$ALL" > "$TMPDIR/$NAME.json"
N_VERSIONS=$(jq 'length' "$TMPDIR/$NAME.json")
echo "[$NAME] $N_VERSIONS version(s)"
# Build the keep set: top $KEEP + anything tagged 'latest'.
jq -r --argjson keep "$KEEP" '
[.[0:$keep][].version] + [.[] | select(.version == "latest") | .version]
| unique
| .[]
' "$TMPDIR/$NAME.json" > "$TMPDIR/$NAME.keep"
# Build the delete set.
jq -r '.[].version' "$TMPDIR/$NAME.json" \
| grep -vxFf "$TMPDIR/$NAME.keep" > "$TMPDIR/$NAME.delete" || true
D_COUNT=$(wc -l < "$TMPDIR/$NAME.delete" | tr -d ' ')
K_COUNT=$(wc -l < "$TMPDIR/$NAME.keep" | tr -d ' ')
echo " keep=$K_COUNT delete=$D_COUNT"
KEPT=$((KEPT + K_COUNT))
while IFS= read -r VER; do
[ -z "$VER" ] && continue
URL="$BASE/packages/$OWNER/container/$NAME/$VER"
if [ "$DRY" = "true" ]; then
echo " DRY_RUN would DELETE $URL"
else
HTTP=$(curl -s -o /dev/null -w '%{http_code}' \
-X DELETE -H "$AUTH_HEADER" "$URL" || echo "000")
if [ "$HTTP" = "204" ] || [ "$HTTP" = "200" ]; then
echo " deleted $NAME:$VER"
else
echo " FAIL $NAME:$VER HTTP $HTTP"
fi
fi
DEL=$((DEL + 1))
done < "$TMPDIR/$NAME.delete"
done
echo "Summary: kept=$KEPT to_delete=$DEL dry_run=$DRY"

View file

@ -32,7 +32,7 @@ resource "kubernetes_persistent_volume_claim" "data_encrypted" {
annotations = {
"resize.topolvm.io/threshold" = "80%"
"resize.topolvm.io/increase" = "50%"
"resize.topolvm.io/storage_limit" = "20Gi"
"resize.topolvm.io/storage_limit" = "50Gi"
}
}
spec {
@ -40,7 +40,7 @@ resource "kubernetes_persistent_volume_claim" "data_encrypted" {
storage_class_name = "proxmox-lvm-encrypted"
resources {
requests = {
storage = "5Gi"
storage = "15Gi"
}
}
}
@ -106,6 +106,18 @@ resource "kubernetes_deployment" "forgejo" {
name = "FORGEJO__webhook__ALLOWED_HOST_LIST"
value = "*.svc.cluster.local"
}
# OCI registry (container packages). Default-on in Forgejo v11 but
# explicit so it can't be silently disabled by an upstream config
# change. Chunked-upload path needs a directory inside /data so it
# survives pod restarts and shares the same PVC as the registry blobs.
env {
name = "FORGEJO__packages__ENABLED"
value = "true"
}
env {
name = "FORGEJO__packages__CHUNKED_UPLOAD_PATH"
value = "/data/tmp/package-upload"
}
volume_mount {
name = "data"
mount_path = "/data"
@ -113,10 +125,10 @@ resource "kubernetes_deployment" "forgejo" {
resources {
requests = {
cpu = "15m"
memory = "384Mi"
memory = "1Gi"
}
limits = {
memory = "384Mi"
memory = "1Gi"
}
}
port {
@ -165,6 +177,9 @@ module "ingress" {
namespace = kubernetes_namespace.forgejo.metadata[0].name
name = "forgejo"
tls_secret_name = var.tls_secret_name
# OCI registry pushes ship full image layer blobs in one request; default
# Traefik buffering chokes on anything past a few hundred MB.
max_body_size = "5g"
extra_annotations = {
"gethomepage.dev/enabled" = "true"
"gethomepage.dev/name" = "Forgejo"

View file

@ -83,6 +83,12 @@ module "k8s-node-template" {
mkdir -p /etc/containerd/certs.d/registry.viktorbarzin.me
printf 'server = "https://registry.viktorbarzin.me"\n\n[host."https://10.0.20.10:5050"]\n capabilities = ["pull", "resolve", "push"]\n skip_verify = true\n' > /etc/containerd/certs.d/registry.viktorbarzin.me/hosts.toml
# Forgejo OCI registry: redirect to in-cluster Traefik LB (10.0.20.200) so
# pulls don't hairpin out through the WAN gateway. Traefik serves the
# *.viktorbarzin.me wildcard so SNI verification still passes.
mkdir -p /etc/containerd/certs.d/forgejo.viktorbarzin.me
printf 'server = "https://forgejo.viktorbarzin.me"\n\n[host."https://10.0.20.200"]\n capabilities = ["pull", "resolve"]\n' > /etc/containerd/certs.d/forgejo.viktorbarzin.me/hosts.toml
# Low-traffic registries (registry.k8s.io, quay.io, reg.kyverno.io) pull directly.
# Pull-through cache removed: caused corrupted images (truncated downloads)
# breaking VPA certgen and Kyverno image pulls.

View file

@ -29,6 +29,12 @@ resource "kubernetes_secret" "registry_credentials" {
"10.0.20.10:5050" = {
auth = base64encode("${data.vault_kv_secret_v2.viktor.data["registry_user"]}:${data.vault_kv_secret_v2.viktor.data["registry_password"]}")
}
# Forgejo OCI registry read-only PAT for the cluster-puller service
# account user. Pushes go through ci-pusher (separate PAT in Vault
# secret/ci/global, surfaced to Woodpecker).
"forgejo.viktorbarzin.me" = {
auth = base64encode("cluster-puller:${data.vault_kv_secret_v2.viktor.data["forgejo_pull_token"]}")
}
}
})
}

View file

@ -33,5 +33,6 @@ module "monitoring" {
kube_config_path = var.kube_config_path
registry_user = data.vault_kv_secret_v2.viktor.data["registry_user"]
registry_password = data.vault_kv_secret_v2.viktor.data["registry_password"]
forgejo_pull_token = data.vault_kv_secret_v2.viktor.data["forgejo_pull_token"]
tier = local.tiers.cluster
}

View file

@ -41,6 +41,11 @@ variable "registry_password" {
type = string
sensitive = true
}
variable "forgejo_pull_token" {
type = string
sensitive = true
description = "PAT for the cluster-puller user, used by the Forgejo registry integrity probe."
}
resource "kubernetes_namespace" "monitoring" {
metadata {
@ -426,6 +431,203 @@ resource "kubernetes_cron_job_v1" "registry_integrity_probe" {
}
}
# -----------------------------------------------------------------------------
# Forgejo registry integrity probe same algorithm as registry-integrity-probe
# above, but targets the Forgejo OCI registry instead of registry-private. Runs
# in parallel with the existing probe during the dual-push bake; once Phase 4
# decommissions registry-private, the registry-integrity-probe CronJob is
# deleted and only this one remains.
#
# Auth: HTTP Basic with cluster-puller PAT (read:package scope is enough to
# walk catalog + manifests). Reaches Forgejo via the in-cluster service so we
# don't hairpin out through Traefik for every probe run.
# -----------------------------------------------------------------------------
resource "kubernetes_secret" "forgejo_probe_credentials" {
metadata {
name = "forgejo-probe-credentials"
namespace = kubernetes_namespace.monitoring.metadata[0].name
}
type = "Opaque"
data = {
REG_USER = "cluster-puller"
REG_PASS = var.forgejo_pull_token
}
}
resource "kubernetes_cron_job_v1" "forgejo_integrity_probe" {
metadata {
name = "forgejo-integrity-probe"
namespace = kubernetes_namespace.monitoring.metadata[0].name
}
spec {
concurrency_policy = "Forbid"
failed_jobs_history_limit = 3
successful_jobs_history_limit = 3
schedule = "*/15 * * * *"
job_template {
metadata {}
spec {
backoff_limit = 1
ttl_seconds_after_finished = 600
template {
metadata {}
spec {
container {
name = "forgejo-integrity-probe"
image = "docker.io/library/alpine:3.20"
env {
name = "REG_USER"
value_from {
secret_key_ref {
name = kubernetes_secret.forgejo_probe_credentials.metadata[0].name
key = "REG_USER"
}
}
}
env {
name = "REG_PASS"
value_from {
secret_key_ref {
name = kubernetes_secret.forgejo_probe_credentials.metadata[0].name
key = "REG_PASS"
}
}
}
env {
name = "REGISTRY_HOST"
value = "forgejo.forgejo.svc.cluster.local"
}
env {
name = "REGISTRY_SCHEME"
value = "http"
}
env {
name = "REGISTRY_INSTANCE"
value = "forgejo.viktorbarzin.me"
}
env {
name = "PUSHGATEWAY"
value = "http://prometheus-prometheus-pushgateway.monitoring:9091/metrics/job/forgejo-integrity-probe"
}
env {
name = "TAGS_PER_REPO"
value = "5"
}
command = ["/bin/sh", "-c", <<-EOT
set -eu
apk add --no-cache curl jq >/dev/null
REG="$REGISTRY_HOST"
SCHEME="$${REGISTRY_SCHEME:-https}"
INSTANCE="$REGISTRY_INSTANCE"
AUTH="$REG_USER:$REG_PASS"
ACCEPT='application/vnd.oci.image.index.v1+json,application/vnd.oci.image.manifest.v1+json,application/vnd.docker.distribution.manifest.list.v2+json,application/vnd.docker.distribution.manifest.v2+json'
push() {
curl -sf --max-time 10 --data-binary @- "$PUSHGATEWAY" >/dev/null 2>&1 || true
}
CATALOG=$(curl -sk -u "$AUTH" --max-time 30 "$SCHEME://$REG/v2/_catalog?n=1000" || echo "")
REPOS=$(echo "$CATALOG" | jq -r '.repositories[]?' 2>/dev/null || echo "")
if [ -z "$REPOS" ]; then
echo "ERROR: empty catalog or auth failure — cannot probe"
NOW=$(date +%s)
push <<METRICS
# TYPE registry_manifest_integrity_catalog_accessible gauge
registry_manifest_integrity_catalog_accessible{instance="$INSTANCE"} 0
# TYPE registry_manifest_integrity_last_run_timestamp gauge
registry_manifest_integrity_last_run_timestamp{instance="$INSTANCE"} $NOW
METRICS
exit 1
fi
FAIL=0
REPOS_N=0
TAGS_N=0
INDEXES_N=0
printf '%s\n' $REPOS > /tmp/repos.txt
while IFS= read -r repo; do
[ -z "$repo" ] && continue
REPOS_N=$((REPOS_N + 1))
TAGS_JSON=$(curl -sk -u "$AUTH" --max-time 15 "$SCHEME://$REG/v2/$repo/tags/list" || echo "")
echo "$TAGS_JSON" | jq -r '.tags[]?' 2>/dev/null | tail -n "$TAGS_PER_REPO" > /tmp/tags.txt || true
while IFS= read -r tag; do
[ -z "$tag" ] && continue
TAGS_N=$((TAGS_N + 1))
HTTP=$(curl -sk -u "$AUTH" -o /tmp/m.json -w '%%{http_code}' \
-H "Accept: $ACCEPT" --max-time 15 \
"$SCHEME://$REG/v2/$repo/manifests/$tag")
if [ "$HTTP" != "200" ]; then
echo "FAIL: $repo:$tag manifest HTTP $HTTP"
FAIL=$((FAIL + 1))
continue
fi
MT=$(jq -r '.mediaType // empty' /tmp/m.json 2>/dev/null || echo "")
if echo "$MT" | grep -Eq 'manifest\.list|image\.index'; then
INDEXES_N=$((INDEXES_N + 1))
jq -r '.manifests[].digest' /tmp/m.json > /tmp/children.txt 2>/dev/null || true
while IFS= read -r d; do
[ -z "$d" ] && continue
CH=$(curl -sk -u "$AUTH" -o /dev/null -w '%%{http_code}' \
-H "Accept: $ACCEPT" --max-time 10 -I \
"$SCHEME://$REG/v2/$repo/manifests/$d")
if [ "$CH" != "200" ]; then
echo "FAIL: $repo:$tag index child $d HTTP $CH"
FAIL=$((FAIL + 1))
fi
done < /tmp/children.txt
fi
done < /tmp/tags.txt
done < /tmp/repos.txt
NOW=$(date +%s)
push <<METRICS
# TYPE registry_manifest_integrity_failures gauge
registry_manifest_integrity_failures{instance="$INSTANCE"} $FAIL
# TYPE registry_manifest_integrity_catalog_accessible gauge
registry_manifest_integrity_catalog_accessible{instance="$INSTANCE"} 1
# TYPE registry_manifest_integrity_repos_checked gauge
registry_manifest_integrity_repos_checked{instance="$INSTANCE"} $REPOS_N
# TYPE registry_manifest_integrity_tags_checked gauge
registry_manifest_integrity_tags_checked{instance="$INSTANCE"} $TAGS_N
# TYPE registry_manifest_integrity_indexes_checked gauge
registry_manifest_integrity_indexes_checked{instance="$INSTANCE"} $INDEXES_N
# TYPE registry_manifest_integrity_last_run_timestamp gauge
registry_manifest_integrity_last_run_timestamp{instance="$INSTANCE"} $NOW
METRICS
echo "Probe complete: $FAIL failures across $REPOS_N repos / $TAGS_N tags / $INDEXES_N indexes"
if [ "$FAIL" -gt 0 ]; then exit 1; fi
EOT
]
resources {
requests = {
cpu = "10m"
memory = "48Mi"
}
limits = {
memory = "96Mi"
}
}
}
restart_policy = "OnFailure"
}
}
}
}
}
lifecycle {
# KYVERNO_LIFECYCLE_V1: Kyverno admission webhook mutates dns_config with ndots=2
ignore_changes = [spec[0].job_template[0].spec[0].template[0].spec[0].dns_config]
}
}
# Expose Pushgateway via NodePort so the PVE host can push LVM snapshot metrics
resource "kubernetes_service" "pushgateway_nodeport" {
metadata {

View file

@ -1656,22 +1656,22 @@ serverFiles:
labels:
severity: critical
annotations:
summary: "Registry has {{ $value }} broken manifest reference(s) — orphan index or missing blob"
description: "The registry-integrity-probe CronJob in the monitoring namespace found {{ $value }} manifest/blob references that return non-200 on the private registry. Almost certainly an orphan OCI-index child from the cleanup-tags.sh+GC race. Rebuild the affected image per docs/runbooks/registry-rebuild-image.md and investigate which tag(s) the probe logs flagged."
summary: "{{ $labels.instance }}: {{ $value }} broken manifest reference(s) — orphan index or missing blob"
description: "The integrity probe CronJob found {{ $value }} manifest/blob references that return non-200 on {{ $labels.instance }}. For registry.viktorbarzin.me see docs/runbooks/registry-rebuild-image.md (orphan OCI-index child from cleanup-tags.sh+GC race). For forgejo.viktorbarzin.me see docs/runbooks/forgejo-registry-rebuild-image.md."
- alert: RegistryIntegrityProbeStale
expr: time() - registry_manifest_integrity_last_run_timestamp > 3600
for: 15m
labels:
severity: warning
annotations:
summary: "Registry integrity probe has not reported in >1h — CronJob may be broken"
summary: "{{ $labels.instance }} integrity probe has not reported in >1h — CronJob may be broken"
- alert: RegistryCatalogInaccessible
expr: registry_manifest_integrity_catalog_accessible == 0
for: 15m
labels:
severity: critical
annotations:
summary: "Registry probe cannot fetch /v2/_catalog — auth failure or registry down"
summary: "{{ $labels.instance }} probe cannot fetch /v2/_catalog — auth failure or registry down"
- alert: NodeHighCPUUsage
expr: pve_cpu_usage_ratio * 100 > 60
for: 6h