Adds Forgejo as a second push target on the build-ci-image pipeline and saves the just-pushed image as a gzipped tarball on the registry VM disk (/opt/registry/data/private/_breakglass/) so we can recover infra-ci with `ctr images import` if both registries are down. * Dual-push: registry.viktorbarzin.me:5050/infra-ci AND forgejo.viktorbarzin.me/viktor/infra-ci, in the same woodpeckerci/plugin-docker-buildx step. Same image bytes; the Forgejo integrity probe (every 15min) catches any divergence. * Break-glass step: SSHes to 10.0.20.10, docker pulls + saves + gzips, keeps last 5 tarballs (latest symlink). Failure-tolerant so a transient registry blip doesn't fail the build pipeline. * Runbook docs/runbooks/forgejo-registry-breakglass.md documents the recovery flow (when to use, scp+ctr import, node cordon, underlying-issue fix). Tarball mirrors to Synology automatically through the existing daily offsite-sync-backup job — no new sync wiring needed. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
164 lines
6.5 KiB
YAML
164 lines
6.5 KiB
YAML
# Build the CI tools Docker image used by all infra pipelines.
|
|
# Triggers on push that touches ci/Dockerfile, or manual (API/UI) so
|
|
# rebuilds after a registry incident don't need a cosmetic Dockerfile edit.
|
|
|
|
when:
|
|
- event: push
|
|
branch: master
|
|
path:
|
|
include:
|
|
- 'ci/Dockerfile'
|
|
- event: manual
|
|
|
|
steps:
|
|
- name: build-and-push
|
|
image: woodpeckerci/plugin-docker-buildx
|
|
settings:
|
|
# Dual-push during the Forgejo registry consolidation bake. infra-ci
|
|
# is the most safety-critical image — every infra pipeline pulls it,
|
|
# including the one that fixes Forgejo when it breaks. Tarball
|
|
# break-glass below covers the chicken-and-egg.
|
|
repo:
|
|
- registry.viktorbarzin.me:5050/infra-ci
|
|
- forgejo.viktorbarzin.me/viktor/infra-ci
|
|
dockerfile: ci/Dockerfile
|
|
context: ci/
|
|
tags:
|
|
- latest
|
|
- "${CI_COMMIT_SHA:0:8}"
|
|
platforms: linux/amd64
|
|
logins:
|
|
- registry: registry.viktorbarzin.me:5050
|
|
username:
|
|
from_secret: registry_user
|
|
password:
|
|
from_secret: registry_password
|
|
- registry: forgejo.viktorbarzin.me
|
|
username:
|
|
from_secret: forgejo_user
|
|
password:
|
|
from_secret: forgejo_push_token
|
|
|
|
# Post-push integrity check. Re-resolves the image we just pushed and HEADs
|
|
# every blob it references — top-level manifest (index or single), each child
|
|
# platform manifest, each config blob, each layer blob. If any returns !=200
|
|
# the pipeline fails loudly here so we never ship a broken index downstream.
|
|
# Historical context: 2026-04-13 and 2026-04-19 incidents both shipped indexes
|
|
# whose platform/attestation children had been GC-orphaned on the registry VM.
|
|
- name: verify-integrity
|
|
image: alpine:3.20
|
|
environment:
|
|
REG_USER:
|
|
from_secret: registry_user
|
|
REG_PASS:
|
|
from_secret: registry_password
|
|
commands:
|
|
- apk add --no-cache curl jq
|
|
- REG=registry.viktorbarzin.me:5050
|
|
- REPO=infra-ci
|
|
- SHA=${CI_COMMIT_SHA:0:8}
|
|
- AUTH="$REG_USER:$REG_PASS"
|
|
- |
|
|
set -euo pipefail
|
|
ACCEPT='Accept: application/vnd.oci.image.index.v1+json,application/vnd.oci.image.manifest.v1+json,application/vnd.docker.distribution.manifest.list.v2+json,application/vnd.docker.distribution.manifest.v2+json'
|
|
|
|
fetch_manifest() {
|
|
# Prints the body to $2, returns the HTTP code as stdout.
|
|
curl -sk -u "$AUTH" -H "$ACCEPT" \
|
|
-o "$2" -w '%{http_code}' \
|
|
"https://$REG/v2/$REPO/manifests/$1"
|
|
}
|
|
head_blob() {
|
|
curl -sk -u "$AUTH" -o /dev/null -w '%{http_code}' \
|
|
-I "https://$REG/v2/$REPO/blobs/$1"
|
|
}
|
|
|
|
verify_single_manifest() {
|
|
local ref="$1" tmp=/tmp/m-$$.json
|
|
local rc cfg
|
|
rc=$(fetch_manifest "$ref" "$tmp")
|
|
if [ "$rc" != "200" ]; then
|
|
echo "FAIL: manifest $ref returned HTTP $rc"; return 1
|
|
fi
|
|
cfg=$(jq -r '.config.digest // empty' "$tmp")
|
|
if [ -n "$cfg" ]; then
|
|
rc=$(head_blob "$cfg")
|
|
[ "$rc" = "200" ] || { echo "FAIL: config blob $cfg returned HTTP $rc"; return 1; }
|
|
fi
|
|
jq -r '.layers[]?.digest' "$tmp" > /tmp/layers-$$.txt
|
|
while IFS= read -r layer; do
|
|
[ -z "$layer" ] && continue
|
|
rc=$(head_blob "$layer")
|
|
[ "$rc" = "200" ] || { echo "FAIL: layer blob $layer returned HTTP $rc"; return 1; }
|
|
done < /tmp/layers-$$.txt
|
|
return 0
|
|
}
|
|
|
|
echo "=== Verifying push integrity for $REPO:$SHA ==="
|
|
TOP=/tmp/top-$$.json
|
|
rc=$(fetch_manifest "$SHA" "$TOP")
|
|
[ "$rc" = "200" ] || { echo "FAIL: top manifest :$SHA returned HTTP $rc"; exit 1; }
|
|
|
|
MT=$(jq -r '.mediaType // empty' "$TOP")
|
|
echo "Top-level media type: ${MT:-<unset>}"
|
|
|
|
if echo "$MT" | grep -Eq 'manifest\.list|image\.index'; then
|
|
jq -r '.manifests[].digest' "$TOP" > /tmp/children-$$.txt
|
|
echo "Multi-platform index: $(wc -l </tmp/children-$$.txt) child manifest(s)"
|
|
while IFS= read -r d; do
|
|
echo "--- child $d ---"
|
|
verify_single_manifest "$d" || exit 1
|
|
done < /tmp/children-$$.txt
|
|
else
|
|
echo "Single-platform manifest — verifying directly"
|
|
verify_single_manifest "$SHA" || exit 1
|
|
fi
|
|
|
|
echo "=== All manifests + blobs verified. Push integrity intact. ==="
|
|
|
|
# Break-glass tarball: save the just-pushed infra-ci image to disk on the
|
|
# registry VM (10.0.20.10) so we can `docker load` it back into a node
|
|
# when Forgejo is unreachable AND registry-private is gone (post-Phase 4).
|
|
# Best-effort — failure here doesn't fail the pipeline.
|
|
# Recovery procedure: docs/runbooks/forgejo-registry-breakglass.md.
|
|
- name: breakglass-tarball
|
|
image: alpine:3.20
|
|
failure: ignore
|
|
environment:
|
|
REGISTRY_SSH_KEY:
|
|
from_secret: registry_ssh_key
|
|
commands:
|
|
- apk add --no-cache openssh-client
|
|
- mkdir -p ~/.ssh && chmod 700 ~/.ssh
|
|
- printf '%s\n' "$REGISTRY_SSH_KEY" > ~/.ssh/id_ed25519
|
|
- chmod 600 ~/.ssh/id_ed25519
|
|
- ssh-keyscan -t ed25519 10.0.20.10 >> ~/.ssh/known_hosts 2>/dev/null
|
|
- SHA=${CI_COMMIT_SHA:0:8}
|
|
- |
|
|
ssh -n -o BatchMode=yes root@10.0.20.10 "
|
|
set -e
|
|
mkdir -p /opt/registry/data/private/_breakglass
|
|
IMAGE=registry.viktorbarzin.me:5050/infra-ci:$SHA
|
|
# Pull from the local registry-private — fast hop on the VM itself.
|
|
docker pull \$IMAGE
|
|
docker save \$IMAGE | gzip > /opt/registry/data/private/_breakglass/infra-ci-$SHA.tar.gz
|
|
ln -sfn infra-ci-$SHA.tar.gz /opt/registry/data/private/_breakglass/infra-ci-latest.tar.gz
|
|
# Retain last 5 by mtime; older versions are still recoverable from
|
|
# registry blobs until a corruption event.
|
|
ls -t /opt/registry/data/private/_breakglass/infra-ci-*.tar.gz \
|
|
| grep -v 'latest' | tail -n +6 | xargs -r rm -v
|
|
ls -lh /opt/registry/data/private/_breakglass/
|
|
"
|
|
|
|
- name: slack
|
|
image: curlimages/curl
|
|
commands:
|
|
- |
|
|
curl -s -X POST -H 'Content-type: application/json' \
|
|
--data "{\"text\":\"CI image built: forgejo.viktorbarzin.me/viktor/infra-ci:${CI_COMMIT_SHA:0:8} (and registry-private mirror)\"}" \
|
|
"$SLACK_WEBHOOK" || true
|
|
environment:
|
|
SLACK_WEBHOOK:
|
|
from_secret: slack_webhook
|
|
when:
|
|
status: [success]
|