The cleanup-tags.sh + garbage-collect cycle can delete blob data while leaving _layers/ link files intact. The registry then returns HTTP 200 with 0 bytes for those layers, causing "unexpected EOF" on image pulls. fix-broken-blobs.sh walks all repositories, checks each layer link against actual blob data, and removes orphaned links so the registry re-fetches from upstream on next pull. Schedule: daily at 2:30am (after tag cleanup) and Sunday 3:30am (after garbage collection). First run found 2335/2556 (91%) of layer links were orphaned.
59 lines
2.2 KiB
Bash
59 lines
2.2 KiB
Bash
#!/usr/bin/env python3
|
|
"""Finds and removes layer links that point to non-existent blobs.
|
|
|
|
When the cleanup-tags.sh + garbage-collect cycle runs, it can delete blob data
|
|
while leaving _layers/ link files intact. The registry then returns HTTP 200
|
|
with 0 bytes for those layers (it finds the link, trusts the blob exists, but
|
|
the data is gone). This causes containerd to fail with "unexpected EOF".
|
|
|
|
This script walks all repositories, checks each layer link against the actual
|
|
blobs directory, and removes any orphaned links. On next pull, the registry
|
|
will re-fetch the missing blobs from the upstream registry.
|
|
|
|
Run after garbage-collect (e.g., 3:15 AM Sunday) or daily.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
|
|
sys.stdout.reconfigure(line_buffering=True)
|
|
|
|
BASE = sys.argv[1] if len(sys.argv) > 1 else "/opt/registry/data"
|
|
DRY_RUN = "--dry-run" in sys.argv
|
|
|
|
total_removed = 0
|
|
total_checked = 0
|
|
|
|
for registry_name in sorted(os.listdir(BASE)):
|
|
repos_dir = os.path.join(BASE, registry_name, "docker/registry/v2/repositories")
|
|
blobs_dir = os.path.join(BASE, registry_name, "docker/registry/v2/blobs")
|
|
|
|
if not os.path.isdir(repos_dir):
|
|
continue
|
|
|
|
for root, dirs, files in os.walk(repos_dir):
|
|
if not root.endswith("/_layers/sha256"):
|
|
continue
|
|
|
|
repo = root.replace(repos_dir + "/", "").replace("/_layers/sha256", "")
|
|
|
|
for digest_dir in os.listdir(root):
|
|
link_file = os.path.join(root, digest_dir, "link")
|
|
if not os.path.isfile(link_file):
|
|
continue
|
|
|
|
total_checked += 1
|
|
|
|
# Check if the actual blob data exists
|
|
blob_data = os.path.join(blobs_dir, "sha256", digest_dir[:2], digest_dir, "data")
|
|
if not os.path.isfile(blob_data):
|
|
prefix = "[DRY RUN] " if DRY_RUN else ""
|
|
print(f"{prefix}[{registry_name}/{repo}] removing orphaned layer link: {digest_dir[:12]}...")
|
|
if not DRY_RUN:
|
|
# Remove the entire digest directory (contains the link file)
|
|
import shutil
|
|
shutil.rmtree(os.path.join(root, digest_dir))
|
|
total_removed += 1
|
|
|
|
mode = "DRY RUN — " if DRY_RUN else ""
|
|
print(f"\n{mode}Checked {total_checked} layer links, removed {total_removed} orphaned.")
|