add registry blob integrity checker to self-heal corrupted cache
The cleanup-tags.sh + garbage-collect cycle can delete blob data while leaving _layers/ link files intact. The registry then returns HTTP 200 with 0 bytes for those layers, causing "unexpected EOF" on image pulls. fix-broken-blobs.sh walks all repositories, checks each layer link against actual blob data, and removes orphaned links so the registry re-fetches from upstream on next pull. Schedule: daily at 2:30am (after tag cleanup) and Sunday 3:30am (after garbage collection). First run found 2335/2556 (91%) of layer links were orphaned.
This commit is contained in:
parent
facf959ecf
commit
dd461beb33
2 changed files with 66 additions and 0 deletions
59
modules/docker-registry/fix-broken-blobs.sh
Normal file
59
modules/docker-registry/fix-broken-blobs.sh
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Finds and removes layer links that point to non-existent blobs.
|
||||
|
||||
When the cleanup-tags.sh + garbage-collect cycle runs, it can delete blob data
|
||||
while leaving _layers/ link files intact. The registry then returns HTTP 200
|
||||
with 0 bytes for those layers (it finds the link, trusts the blob exists, but
|
||||
the data is gone). This causes containerd to fail with "unexpected EOF".
|
||||
|
||||
This script walks all repositories, checks each layer link against the actual
|
||||
blobs directory, and removes any orphaned links. On next pull, the registry
|
||||
will re-fetch the missing blobs from the upstream registry.
|
||||
|
||||
Run after garbage-collect (e.g., 3:15 AM Sunday) or daily.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.stdout.reconfigure(line_buffering=True)
|
||||
|
||||
BASE = sys.argv[1] if len(sys.argv) > 1 else "/opt/registry/data"
|
||||
DRY_RUN = "--dry-run" in sys.argv
|
||||
|
||||
total_removed = 0
|
||||
total_checked = 0
|
||||
|
||||
for registry_name in sorted(os.listdir(BASE)):
|
||||
repos_dir = os.path.join(BASE, registry_name, "docker/registry/v2/repositories")
|
||||
blobs_dir = os.path.join(BASE, registry_name, "docker/registry/v2/blobs")
|
||||
|
||||
if not os.path.isdir(repos_dir):
|
||||
continue
|
||||
|
||||
for root, dirs, files in os.walk(repos_dir):
|
||||
if not root.endswith("/_layers/sha256"):
|
||||
continue
|
||||
|
||||
repo = root.replace(repos_dir + "/", "").replace("/_layers/sha256", "")
|
||||
|
||||
for digest_dir in os.listdir(root):
|
||||
link_file = os.path.join(root, digest_dir, "link")
|
||||
if not os.path.isfile(link_file):
|
||||
continue
|
||||
|
||||
total_checked += 1
|
||||
|
||||
# Check if the actual blob data exists
|
||||
blob_data = os.path.join(blobs_dir, "sha256", digest_dir[:2], digest_dir, "data")
|
||||
if not os.path.isfile(blob_data):
|
||||
prefix = "[DRY RUN] " if DRY_RUN else ""
|
||||
print(f"{prefix}[{registry_name}/{repo}] removing orphaned layer link: {digest_dir[:12]}...")
|
||||
if not DRY_RUN:
|
||||
# Remove the entire digest directory (contains the link file)
|
||||
import shutil
|
||||
shutil.rmtree(os.path.join(root, digest_dir))
|
||||
total_removed += 1
|
||||
|
||||
mode = "DRY RUN — " if DRY_RUN else ""
|
||||
print(f"\n{mode}Checked {total_checked} layer links, removed {total_removed} orphaned.")
|
||||
|
|
@ -270,6 +270,10 @@ module "docker-registry-template" {
|
|||
format("echo %s | base64 -d > /opt/registry/cleanup-tags.sh && chmod +x /opt/registry/cleanup-tags.sh",
|
||||
base64encode(file("${path.root}/../../modules/docker-registry/cleanup-tags.sh"))
|
||||
),
|
||||
# Write blob integrity checker
|
||||
format("echo %s | base64 -d > /opt/registry/fix-broken-blobs.sh && chmod +x /opt/registry/fix-broken-blobs.sh",
|
||||
base64encode(file("${path.root}/../../modules/docker-registry/fix-broken-blobs.sh"))
|
||||
),
|
||||
# Create systemd unit for docker compose
|
||||
format("echo %s | base64 -d > /etc/systemd/system/docker-compose-registry.service",
|
||||
base64encode(<<-UNIT
|
||||
|
|
@ -304,6 +308,9 @@ UNIT
|
|||
"( crontab -l 2>/dev/null; echo '25 3 * * 0 /usr/bin/docker exec registry-private registry garbage-collect -m /etc/docker/registry/config.yml >> /var/log/registry-gc.log 2>&1' ) | crontab -",
|
||||
# Cron: tag cleanup (daily 2am, keep last 10 tags per image)
|
||||
"( crontab -l 2>/dev/null; echo '0 2 * * * python3 /opt/registry/cleanup-tags.sh 10 >> /var/log/registry-cleanup.log 2>&1' ) | crontab -",
|
||||
# Cron: blob integrity check (after GC on Sunday, and daily 2:30am after tag cleanup)
|
||||
"( crontab -l 2>/dev/null; echo '30 3 * * 0 python3 /opt/registry/fix-broken-blobs.sh >> /var/log/registry-fix-blobs.log 2>&1' ) | crontab -",
|
||||
"( crontab -l 2>/dev/null; echo '30 2 * * 1-6 python3 /opt/registry/fix-broken-blobs.sh >> /var/log/registry-fix-blobs.log 2>&1' ) | crontab -",
|
||||
]
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue