[forgejo] Phase 4 final decommission: drop registry-private container + port 5050
Image migration completed (forgejo-migrate-orphan-images.sh ran + all in-scope images now under forgejo.viktorbarzin.me/viktor/) and the cluster cutover landed in commit 3148d15d. registry-private is no longer needed. * infra/modules/docker-registry/docker-compose.yml — registry-private service block removed; nginx 5050 port mapping dropped. * infra/modules/docker-registry/nginx_registry.conf — upstream private block + port 5050 server block removed. * infra/.woodpecker/build-ci-image.yml — drop the dual-push to registry.viktorbarzin.me:5050; only push to Forgejo. Verify- integrity step removed (the every-15min forgejo-integrity-probe in monitoring covers it). Break-glass tarball step still runs but pulls from Forgejo (the only registry left). The registry-config-sync.yml pipeline will pick this commit up and sync the new compose+nginx to the VM. Manual final step on the VM: ssh root@10.0.20.10 'cd /opt/registry && docker compose up -d --remove-orphans' to actually destroy the registry-private container — compose does NOT do orphan removal on a normal up -d. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
59885c21d0
commit
8c73a0243a
3 changed files with 31 additions and 168 deletions
|
|
@ -14,12 +14,9 @@ steps:
|
|||
- name: build-and-push
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
# Dual-push during the Forgejo registry consolidation bake. infra-ci
|
||||
# is the most safety-critical image — every infra pipeline pulls it,
|
||||
# including the one that fixes Forgejo when it breaks. Tarball
|
||||
# break-glass below covers the chicken-and-egg.
|
||||
# Phase 4 of forgejo-registry-consolidation 2026-05-07 —
|
||||
# registry.viktorbarzin.me dropped, Forgejo is the only target.
|
||||
repo:
|
||||
- registry.viktorbarzin.me:5050/infra-ci
|
||||
- forgejo.viktorbarzin.me/viktor/infra-ci
|
||||
dockerfile: ci/Dockerfile
|
||||
context: ci/
|
||||
|
|
@ -28,97 +25,22 @@ steps:
|
|||
- "${CI_COMMIT_SHA:0:8}"
|
||||
platforms: linux/amd64
|
||||
logins:
|
||||
- registry: registry.viktorbarzin.me:5050
|
||||
username:
|
||||
from_secret: registry_user
|
||||
password:
|
||||
from_secret: registry_password
|
||||
- registry: forgejo.viktorbarzin.me
|
||||
username:
|
||||
from_secret: forgejo_user
|
||||
password:
|
||||
from_secret: forgejo_push_token
|
||||
|
||||
# Post-push integrity check. Re-resolves the image we just pushed and HEADs
|
||||
# every blob it references — top-level manifest (index or single), each child
|
||||
# platform manifest, each config blob, each layer blob. If any returns !=200
|
||||
# the pipeline fails loudly here so we never ship a broken index downstream.
|
||||
# Historical context: 2026-04-13 and 2026-04-19 incidents both shipped indexes
|
||||
# whose platform/attestation children had been GC-orphaned on the registry VM.
|
||||
- name: verify-integrity
|
||||
image: alpine:3.20
|
||||
environment:
|
||||
REG_USER:
|
||||
from_secret: registry_user
|
||||
REG_PASS:
|
||||
from_secret: registry_password
|
||||
commands:
|
||||
- apk add --no-cache curl jq
|
||||
- REG=registry.viktorbarzin.me:5050
|
||||
- REPO=infra-ci
|
||||
- SHA=${CI_COMMIT_SHA:0:8}
|
||||
- AUTH="$REG_USER:$REG_PASS"
|
||||
- |
|
||||
set -euo pipefail
|
||||
ACCEPT='Accept: application/vnd.oci.image.index.v1+json,application/vnd.oci.image.manifest.v1+json,application/vnd.docker.distribution.manifest.list.v2+json,application/vnd.docker.distribution.manifest.v2+json'
|
||||
|
||||
fetch_manifest() {
|
||||
# Prints the body to $2, returns the HTTP code as stdout.
|
||||
curl -sk -u "$AUTH" -H "$ACCEPT" \
|
||||
-o "$2" -w '%{http_code}' \
|
||||
"https://$REG/v2/$REPO/manifests/$1"
|
||||
}
|
||||
head_blob() {
|
||||
curl -sk -u "$AUTH" -o /dev/null -w '%{http_code}' \
|
||||
-I "https://$REG/v2/$REPO/blobs/$1"
|
||||
}
|
||||
|
||||
verify_single_manifest() {
|
||||
local ref="$1" tmp=/tmp/m-$$.json
|
||||
local rc cfg
|
||||
rc=$(fetch_manifest "$ref" "$tmp")
|
||||
if [ "$rc" != "200" ]; then
|
||||
echo "FAIL: manifest $ref returned HTTP $rc"; return 1
|
||||
fi
|
||||
cfg=$(jq -r '.config.digest // empty' "$tmp")
|
||||
if [ -n "$cfg" ]; then
|
||||
rc=$(head_blob "$cfg")
|
||||
[ "$rc" = "200" ] || { echo "FAIL: config blob $cfg returned HTTP $rc"; return 1; }
|
||||
fi
|
||||
jq -r '.layers[]?.digest' "$tmp" > /tmp/layers-$$.txt
|
||||
while IFS= read -r layer; do
|
||||
[ -z "$layer" ] && continue
|
||||
rc=$(head_blob "$layer")
|
||||
[ "$rc" = "200" ] || { echo "FAIL: layer blob $layer returned HTTP $rc"; return 1; }
|
||||
done < /tmp/layers-$$.txt
|
||||
return 0
|
||||
}
|
||||
|
||||
echo "=== Verifying push integrity for $REPO:$SHA ==="
|
||||
TOP=/tmp/top-$$.json
|
||||
rc=$(fetch_manifest "$SHA" "$TOP")
|
||||
[ "$rc" = "200" ] || { echo "FAIL: top manifest :$SHA returned HTTP $rc"; exit 1; }
|
||||
|
||||
MT=$(jq -r '.mediaType // empty' "$TOP")
|
||||
echo "Top-level media type: ${MT:-<unset>}"
|
||||
|
||||
if echo "$MT" | grep -Eq 'manifest\.list|image\.index'; then
|
||||
jq -r '.manifests[].digest' "$TOP" > /tmp/children-$$.txt
|
||||
echo "Multi-platform index: $(wc -l </tmp/children-$$.txt) child manifest(s)"
|
||||
while IFS= read -r d; do
|
||||
echo "--- child $d ---"
|
||||
verify_single_manifest "$d" || exit 1
|
||||
done < /tmp/children-$$.txt
|
||||
else
|
||||
echo "Single-platform manifest — verifying directly"
|
||||
verify_single_manifest "$SHA" || exit 1
|
||||
fi
|
||||
|
||||
echo "=== All manifests + blobs verified. Push integrity intact. ==="
|
||||
# Post-push integrity check is now redundant with the every-15min
|
||||
# forgejo-integrity-probe in stacks/monitoring/, which walks
|
||||
# /v2/_catalog + HEADs every blob across the entire Forgejo registry.
|
||||
# If a corruption pattern emerges that the periodic probe misses,
|
||||
# restore a verify step similar to the pre-Phase-4 version (see
|
||||
# commit 49f4956f) but pointed at forgejo.viktorbarzin.me.
|
||||
|
||||
# Break-glass tarball: save the just-pushed infra-ci image to disk on the
|
||||
# registry VM (10.0.20.10) so we can `docker load` it back into a node
|
||||
# when Forgejo is unreachable AND registry-private is gone (post-Phase 4).
|
||||
# when Forgejo is unreachable. Pulls from Forgejo (the only registry now).
|
||||
# Best-effort — failure here doesn't fail the pipeline.
|
||||
# Recovery procedure: docs/runbooks/forgejo-registry-breakglass.md.
|
||||
- name: breakglass-tarball
|
||||
|
|
@ -127,6 +49,10 @@ steps:
|
|||
environment:
|
||||
REGISTRY_SSH_KEY:
|
||||
from_secret: registry_ssh_key
|
||||
FORGEJO_USER:
|
||||
from_secret: forgejo_user
|
||||
FORGEJO_PASS:
|
||||
from_secret: forgejo_push_token
|
||||
commands:
|
||||
- apk add --no-cache openssh-client
|
||||
- mkdir -p ~/.ssh && chmod 700 ~/.ssh
|
||||
|
|
@ -138,13 +64,11 @@ steps:
|
|||
ssh -n -o BatchMode=yes root@10.0.20.10 "
|
||||
set -e
|
||||
mkdir -p /opt/registry/data/private/_breakglass
|
||||
IMAGE=registry.viktorbarzin.me:5050/infra-ci:$SHA
|
||||
# Pull from the local registry-private — fast hop on the VM itself.
|
||||
IMAGE=forgejo.viktorbarzin.me/viktor/infra-ci:$SHA
|
||||
echo \$FORGEJO_PASS | docker login forgejo.viktorbarzin.me -u \$FORGEJO_USER --password-stdin
|
||||
docker pull \$IMAGE
|
||||
docker save \$IMAGE | gzip > /opt/registry/data/private/_breakglass/infra-ci-$SHA.tar.gz
|
||||
ln -sfn infra-ci-$SHA.tar.gz /opt/registry/data/private/_breakglass/infra-ci-latest.tar.gz
|
||||
# Retain last 5 by mtime; older versions are still recoverable from
|
||||
# registry blobs until a corruption event.
|
||||
ls -t /opt/registry/data/private/_breakglass/infra-ci-*.tar.gz \
|
||||
| grep -v 'latest' | tail -n +6 | xargs -r rm -v
|
||||
ls -lh /opt/registry/data/private/_breakglass/
|
||||
|
|
|
|||
|
|
@ -89,45 +89,26 @@ services:
|
|||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
# registry-private is being kept TEMPORARILY during Phase 3+4 of
|
||||
# forgejo-registry-consolidation 2026-05-07 — needed for the orphan-image
|
||||
# migration script to pull images one last time before flipping the
|
||||
# cluster off it. To remove: drop this entire block, drop the 5050:5050
|
||||
# port mapping in nginx, and run on the live VM:
|
||||
# ssh root@10.0.20.10 'cd /opt/registry && docker compose up -d --remove-orphans'
|
||||
# …and after 1 week of no incidents, `rm -rf /opt/registry/data/private/`.
|
||||
registry-private:
|
||||
image: registry:2.8.3
|
||||
container_name: registry-private
|
||||
restart: always
|
||||
volumes:
|
||||
- /opt/registry/data/private:/var/lib/registry
|
||||
- /opt/registry/config-private.yml:/etc/docker/registry/config.yml:ro
|
||||
- /opt/registry/htpasswd:/auth/htpasswd:ro
|
||||
networks:
|
||||
- registry
|
||||
healthcheck:
|
||||
# 401 is expected (auth required) — any HTTP response means the registry is healthy
|
||||
test: ["CMD", "sh", "-c", "wget -qS -O /dev/null http://127.0.0.1:5000/v2/ 2>&1 | grep -q 'HTTP/'"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
# registry-private decommissioned in Phase 4 of
|
||||
# forgejo-registry-consolidation 2026-05-07 — image migration completed,
|
||||
# cluster flipped to forgejo.viktorbarzin.me/viktor/<image>. The remaining
|
||||
# five services on this VM are pull-through caches for upstream registries.
|
||||
# After 1 week of no incidents, `rm -rf /opt/registry/data/private/` on the
|
||||
# VM frees ~2.6 GB. The tarball break-glass under
|
||||
# /opt/registry/data/private/_breakglass/ stays — it's how we recover
|
||||
# infra-ci if Forgejo ever goes fully down.
|
||||
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
container_name: registry-nginx
|
||||
restart: always
|
||||
# 5050 will be dropped after the migration script finishes copying images
|
||||
# off registry-private — see Phase 4 of forgejo-registry-consolidation
|
||||
# 2026-05-07.
|
||||
# 5050 dropped Phase 4 of forgejo-registry-consolidation 2026-05-07.
|
||||
ports:
|
||||
- "5000:5000"
|
||||
- "5010:5010"
|
||||
- "5020:5020"
|
||||
- "5030:5030"
|
||||
- "5040:5040"
|
||||
- "5050:5050"
|
||||
volumes:
|
||||
- /opt/registry/nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
- /opt/registry/tls:/etc/nginx/tls:ro
|
||||
|
|
@ -145,8 +126,6 @@ services:
|
|||
condition: service_healthy
|
||||
registry-kyverno:
|
||||
condition: service_healthy
|
||||
registry-private:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "sh", "-c", "wget -qO- http://127.0.0.1:5000/v2/ >/dev/null 2>&1"]
|
||||
interval: 30s
|
||||
|
|
|
|||
|
|
@ -33,16 +33,9 @@ http {
|
|||
keepalive 32;
|
||||
}
|
||||
|
||||
# `upstream private` is being kept TEMPORARILY during Phase 3+4 of
|
||||
# forgejo-registry-consolidation 2026-05-07 — registry-private is the
|
||||
# source of every image we still need to migrate to Forgejo. Drop this
|
||||
# block and the port-5050 server below in the SAME commit that runs the
|
||||
# final `docker compose up -d --remove-orphans` to stop the
|
||||
# registry-private container.
|
||||
upstream private {
|
||||
server registry-private:5000;
|
||||
keepalive 32;
|
||||
}
|
||||
# `upstream private` removed in Phase 4 of forgejo-registry-consolidation
|
||||
# 2026-05-07. The /v2/ private registry is now Forgejo at
|
||||
# forgejo.viktorbarzin.me/viktor/.
|
||||
|
||||
# --- Docker Hub (port 5000) ---
|
||||
|
||||
|
|
@ -174,41 +167,8 @@ http {
|
|||
}
|
||||
}
|
||||
|
||||
# --- Private R/W Registry (port 5050, TLS) ---
|
||||
# KEPT TEMPORARILY during Phase 3+4 of forgejo-registry-consolidation
|
||||
# 2026-05-07 to allow the orphan-image migration script to pull images
|
||||
# off this registry one last time. To remove: drop this server block,
|
||||
# the upstream `private` block above, and the 5050:5050 port mapping
|
||||
# in docker-compose.yml — all in the same commit.
|
||||
server {
|
||||
listen 5050 ssl;
|
||||
server_name registry.viktorbarzin.me;
|
||||
|
||||
ssl_certificate /etc/nginx/tls/fullchain.pem;
|
||||
ssl_certificate_key /etc/nginx/tls/privkey.pem;
|
||||
ssl_protocols TLSv1.2 TLSv1.3;
|
||||
|
||||
client_max_body_size 0;
|
||||
proxy_request_buffering off;
|
||||
proxy_buffering off;
|
||||
chunked_transfer_encoding on;
|
||||
|
||||
location /v2/ {
|
||||
proxy_pass http://private;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $http_host;
|
||||
proxy_set_header Connection "";
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
proxy_read_timeout 900;
|
||||
proxy_send_timeout 900;
|
||||
}
|
||||
|
||||
location / {
|
||||
return 200 'ok';
|
||||
add_header Content-Type text/plain;
|
||||
}
|
||||
}
|
||||
# --- Private R/W Registry (port 5050) decommissioned Phase 4 2026-05-07 ---
|
||||
# The TLS port 5050 server block previously fronted `registry-private`.
|
||||
# Migrated to Forgejo at forgejo.viktorbarzin.me/viktor/. Both
|
||||
# docker-compose.yml and this nginx config no longer reference port 5050.
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue