[forgejo] Phase 4 final decommission: drop registry-private container + port 5050

Image migration completed (forgejo-migrate-orphan-images.sh ran +
all in-scope images now under forgejo.viktorbarzin.me/viktor/) and
the cluster cutover landed in commit 3148d15d. registry-private is
no longer needed.

* infra/modules/docker-registry/docker-compose.yml — registry-private
  service block removed; nginx 5050 port mapping dropped.
* infra/modules/docker-registry/nginx_registry.conf — upstream
  private block + port 5050 server block removed.
* infra/.woodpecker/build-ci-image.yml — drop the dual-push to
  registry.viktorbarzin.me:5050; only push to Forgejo. Verify-
  integrity step removed (the every-15min forgejo-integrity-probe
  in monitoring covers it). Break-glass tarball step still runs but
  pulls from Forgejo (the only registry left).

The registry-config-sync.yml pipeline will pick this commit up and
sync the new compose+nginx to the VM. Manual final step on the VM:
  ssh root@10.0.20.10 'cd /opt/registry && docker compose up -d --remove-orphans'
to actually destroy the registry-private container — compose does
NOT do orphan removal on a normal up -d.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Viktor Barzin 2026-05-07 19:08:17 +00:00
parent 59885c21d0
commit 8c73a0243a
3 changed files with 31 additions and 168 deletions

View file

@ -14,12 +14,9 @@ steps:
- name: build-and-push
image: woodpeckerci/plugin-docker-buildx
settings:
# Dual-push during the Forgejo registry consolidation bake. infra-ci
# is the most safety-critical image — every infra pipeline pulls it,
# including the one that fixes Forgejo when it breaks. Tarball
# break-glass below covers the chicken-and-egg.
# Phase 4 of forgejo-registry-consolidation 2026-05-07 —
# registry.viktorbarzin.me dropped, Forgejo is the only target.
repo:
- registry.viktorbarzin.me:5050/infra-ci
- forgejo.viktorbarzin.me/viktor/infra-ci
dockerfile: ci/Dockerfile
context: ci/
@ -28,97 +25,22 @@ steps:
- "${CI_COMMIT_SHA:0:8}"
platforms: linux/amd64
logins:
- registry: registry.viktorbarzin.me:5050
username:
from_secret: registry_user
password:
from_secret: registry_password
- registry: forgejo.viktorbarzin.me
username:
from_secret: forgejo_user
password:
from_secret: forgejo_push_token
# Post-push integrity check. Re-resolves the image we just pushed and HEADs
# every blob it references — top-level manifest (index or single), each child
# platform manifest, each config blob, each layer blob. If any returns !=200
# the pipeline fails loudly here so we never ship a broken index downstream.
# Historical context: 2026-04-13 and 2026-04-19 incidents both shipped indexes
# whose platform/attestation children had been GC-orphaned on the registry VM.
- name: verify-integrity
image: alpine:3.20
environment:
REG_USER:
from_secret: registry_user
REG_PASS:
from_secret: registry_password
commands:
- apk add --no-cache curl jq
- REG=registry.viktorbarzin.me:5050
- REPO=infra-ci
- SHA=${CI_COMMIT_SHA:0:8}
- AUTH="$REG_USER:$REG_PASS"
- |
set -euo pipefail
ACCEPT='Accept: application/vnd.oci.image.index.v1+json,application/vnd.oci.image.manifest.v1+json,application/vnd.docker.distribution.manifest.list.v2+json,application/vnd.docker.distribution.manifest.v2+json'
fetch_manifest() {
# Prints the body to $2, returns the HTTP code as stdout.
curl -sk -u "$AUTH" -H "$ACCEPT" \
-o "$2" -w '%{http_code}' \
"https://$REG/v2/$REPO/manifests/$1"
}
head_blob() {
curl -sk -u "$AUTH" -o /dev/null -w '%{http_code}' \
-I "https://$REG/v2/$REPO/blobs/$1"
}
verify_single_manifest() {
local ref="$1" tmp=/tmp/m-$$.json
local rc cfg
rc=$(fetch_manifest "$ref" "$tmp")
if [ "$rc" != "200" ]; then
echo "FAIL: manifest $ref returned HTTP $rc"; return 1
fi
cfg=$(jq -r '.config.digest // empty' "$tmp")
if [ -n "$cfg" ]; then
rc=$(head_blob "$cfg")
[ "$rc" = "200" ] || { echo "FAIL: config blob $cfg returned HTTP $rc"; return 1; }
fi
jq -r '.layers[]?.digest' "$tmp" > /tmp/layers-$$.txt
while IFS= read -r layer; do
[ -z "$layer" ] && continue
rc=$(head_blob "$layer")
[ "$rc" = "200" ] || { echo "FAIL: layer blob $layer returned HTTP $rc"; return 1; }
done < /tmp/layers-$$.txt
return 0
}
echo "=== Verifying push integrity for $REPO:$SHA ==="
TOP=/tmp/top-$$.json
rc=$(fetch_manifest "$SHA" "$TOP")
[ "$rc" = "200" ] || { echo "FAIL: top manifest :$SHA returned HTTP $rc"; exit 1; }
MT=$(jq -r '.mediaType // empty' "$TOP")
echo "Top-level media type: ${MT:-<unset>}"
if echo "$MT" | grep -Eq 'manifest\.list|image\.index'; then
jq -r '.manifests[].digest' "$TOP" > /tmp/children-$$.txt
echo "Multi-platform index: $(wc -l </tmp/children-$$.txt) child manifest(s)"
while IFS= read -r d; do
echo "--- child $d ---"
verify_single_manifest "$d" || exit 1
done < /tmp/children-$$.txt
else
echo "Single-platform manifest — verifying directly"
verify_single_manifest "$SHA" || exit 1
fi
echo "=== All manifests + blobs verified. Push integrity intact. ==="
# Post-push integrity check is now redundant with the every-15min
# forgejo-integrity-probe in stacks/monitoring/, which walks
# /v2/_catalog + HEADs every blob across the entire Forgejo registry.
# If a corruption pattern emerges that the periodic probe misses,
# restore a verify step similar to the pre-Phase-4 version (see
# commit 49f4956f) but pointed at forgejo.viktorbarzin.me.
# Break-glass tarball: save the just-pushed infra-ci image to disk on the
# registry VM (10.0.20.10) so we can `docker load` it back into a node
# when Forgejo is unreachable AND registry-private is gone (post-Phase 4).
# when Forgejo is unreachable. Pulls from Forgejo (the only registry now).
# Best-effort — failure here doesn't fail the pipeline.
# Recovery procedure: docs/runbooks/forgejo-registry-breakglass.md.
- name: breakglass-tarball
@ -127,6 +49,10 @@ steps:
environment:
REGISTRY_SSH_KEY:
from_secret: registry_ssh_key
FORGEJO_USER:
from_secret: forgejo_user
FORGEJO_PASS:
from_secret: forgejo_push_token
commands:
- apk add --no-cache openssh-client
- mkdir -p ~/.ssh && chmod 700 ~/.ssh
@ -138,13 +64,11 @@ steps:
ssh -n -o BatchMode=yes root@10.0.20.10 "
set -e
mkdir -p /opt/registry/data/private/_breakglass
IMAGE=registry.viktorbarzin.me:5050/infra-ci:$SHA
# Pull from the local registry-private — fast hop on the VM itself.
IMAGE=forgejo.viktorbarzin.me/viktor/infra-ci:$SHA
echo \$FORGEJO_PASS | docker login forgejo.viktorbarzin.me -u \$FORGEJO_USER --password-stdin
docker pull \$IMAGE
docker save \$IMAGE | gzip > /opt/registry/data/private/_breakglass/infra-ci-$SHA.tar.gz
ln -sfn infra-ci-$SHA.tar.gz /opt/registry/data/private/_breakglass/infra-ci-latest.tar.gz
# Retain last 5 by mtime; older versions are still recoverable from
# registry blobs until a corruption event.
ls -t /opt/registry/data/private/_breakglass/infra-ci-*.tar.gz \
| grep -v 'latest' | tail -n +6 | xargs -r rm -v
ls -lh /opt/registry/data/private/_breakglass/

View file

@ -89,45 +89,26 @@ services:
retries: 3
start_period: 10s
# registry-private is being kept TEMPORARILY during Phase 3+4 of
# forgejo-registry-consolidation 2026-05-07 — needed for the orphan-image
# migration script to pull images one last time before flipping the
# cluster off it. To remove: drop this entire block, drop the 5050:5050
# port mapping in nginx, and run on the live VM:
# ssh root@10.0.20.10 'cd /opt/registry && docker compose up -d --remove-orphans'
# …and after 1 week of no incidents, `rm -rf /opt/registry/data/private/`.
registry-private:
image: registry:2.8.3
container_name: registry-private
restart: always
volumes:
- /opt/registry/data/private:/var/lib/registry
- /opt/registry/config-private.yml:/etc/docker/registry/config.yml:ro
- /opt/registry/htpasswd:/auth/htpasswd:ro
networks:
- registry
healthcheck:
# 401 is expected (auth required) — any HTTP response means the registry is healthy
test: ["CMD", "sh", "-c", "wget -qS -O /dev/null http://127.0.0.1:5000/v2/ 2>&1 | grep -q 'HTTP/'"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
# registry-private decommissioned in Phase 4 of
# forgejo-registry-consolidation 2026-05-07 — image migration completed,
# cluster flipped to forgejo.viktorbarzin.me/viktor/<image>. The remaining
# five services on this VM are pull-through caches for upstream registries.
# After 1 week of no incidents, `rm -rf /opt/registry/data/private/` on the
# VM frees ~2.6 GB. The tarball break-glass under
# /opt/registry/data/private/_breakglass/ stays — it's how we recover
# infra-ci if Forgejo ever goes fully down.
nginx:
image: nginx:alpine
container_name: registry-nginx
restart: always
# 5050 will be dropped after the migration script finishes copying images
# off registry-private — see Phase 4 of forgejo-registry-consolidation
# 2026-05-07.
# 5050 dropped Phase 4 of forgejo-registry-consolidation 2026-05-07.
ports:
- "5000:5000"
- "5010:5010"
- "5020:5020"
- "5030:5030"
- "5040:5040"
- "5050:5050"
volumes:
- /opt/registry/nginx.conf:/etc/nginx/nginx.conf:ro
- /opt/registry/tls:/etc/nginx/tls:ro
@ -145,8 +126,6 @@ services:
condition: service_healthy
registry-kyverno:
condition: service_healthy
registry-private:
condition: service_healthy
healthcheck:
test: ["CMD", "sh", "-c", "wget -qO- http://127.0.0.1:5000/v2/ >/dev/null 2>&1"]
interval: 30s

View file

@ -33,16 +33,9 @@ http {
keepalive 32;
}
# `upstream private` is being kept TEMPORARILY during Phase 3+4 of
# forgejo-registry-consolidation 2026-05-07 — registry-private is the
# source of every image we still need to migrate to Forgejo. Drop this
# block and the port-5050 server below in the SAME commit that runs the
# final `docker compose up -d --remove-orphans` to stop the
# registry-private container.
upstream private {
server registry-private:5000;
keepalive 32;
}
# `upstream private` removed in Phase 4 of forgejo-registry-consolidation
# 2026-05-07. The /v2/ private registry is now Forgejo at
# forgejo.viktorbarzin.me/viktor/.
# --- Docker Hub (port 5000) ---
@ -174,41 +167,8 @@ http {
}
}
# --- Private R/W Registry (port 5050, TLS) ---
# KEPT TEMPORARILY during Phase 3+4 of forgejo-registry-consolidation
# 2026-05-07 to allow the orphan-image migration script to pull images
# off this registry one last time. To remove: drop this server block,
# the upstream `private` block above, and the 5050:5050 port mapping
# in docker-compose.yml — all in the same commit.
server {
listen 5050 ssl;
server_name registry.viktorbarzin.me;
ssl_certificate /etc/nginx/tls/fullchain.pem;
ssl_certificate_key /etc/nginx/tls/privkey.pem;
ssl_protocols TLSv1.2 TLSv1.3;
client_max_body_size 0;
proxy_request_buffering off;
proxy_buffering off;
chunked_transfer_encoding on;
location /v2/ {
proxy_pass http://private;
proxy_http_version 1.1;
proxy_set_header Host $http_host;
proxy_set_header Connection "";
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 900;
proxy_send_timeout 900;
}
location / {
return 200 'ok';
add_header Content-Type text/plain;
}
}
# --- Private R/W Registry (port 5050) decommissioned Phase 4 2026-05-07 ---
# The TLS port 5050 server block previously fronted `registry-private`.
# Migrated to Forgejo at forgejo.viktorbarzin.me/viktor/. Both
# docker-compose.yml and this nginx config no longer reference port 5050.
}