diff --git a/main.tf b/main.tf index c45e8cd9..dd85b1ec 100644 --- a/main.tf +++ b/main.tf @@ -281,7 +281,7 @@ module "docker-registry-template" { format("echo %s | base64 -d > /etc/docker-registry/cleanup-tags.sh && chmod +x /etc/docker-registry/cleanup-tags.sh", base64encode(file("${path.root}/modules/docker-registry/cleanup-tags.sh")) ), - "( crontab -l 2>/dev/null; echo '0 2 * * * /etc/docker-registry/cleanup-tags.sh 10 >> /var/log/registry-cleanup.log 2>&1' ) | crontab -", + "( crontab -l 2>/dev/null; echo '0 2 * * * python3 /etc/docker-registry/cleanup-tags.sh 10 >> /var/log/registry-cleanup.log 2>&1' ) | crontab -", ] } diff --git a/modules/docker-registry/cleanup-tags.sh b/modules/docker-registry/cleanup-tags.sh index fe51d51a..ceaa85b1 100644 --- a/modules/docker-registry/cleanup-tags.sh +++ b/modules/docker-registry/cleanup-tags.sh @@ -1,72 +1,48 @@ #!/usr/bin/env python3 -"""Keeps only the N most recent tags per image in a Docker registry. -Uses filesystem modification times on the tag directories for speed. -Run garbage-collect after this to reclaim disk space.""" +"""Keeps only the N most recent tags per image in a pull-through cache registry. +Deletes old tag links directly from the filesystem since the API doesn't support +DELETE on proxy registries. Run garbage-collect after to reclaim blob storage.""" -import json import os +import shutil import sys -import urllib.request sys.stdout.reconfigure(line_buffering=True) -REGISTRY = "http://127.0.0.1:5000" KEEP = int(sys.argv[1]) if len(sys.argv) > 1 else 10 -# Registry storage path (docker volume) STORAGE = "/var/lib/docker/volumes/57b3f1c5fcc7f39c040e17072e10b4536245357d09340206683c04096d30b942/_data/docker/registry/v2/repositories" -def api(path, method="GET", headers=None): - req = urllib.request.Request(f"{REGISTRY}{path}", method=method, headers=headers or {}) - try: - with urllib.request.urlopen(req, timeout=30) as r: - if method == "HEAD": - return dict(r.headers) - return json.loads(r.read()) - except Exception: - return None - -# Get all repos -catalog = api("/v2/_catalog") -if not catalog: - print("Failed to fetch catalog") - sys.exit(1) - total_deleted = 0 -for repo in catalog.get("repositories", []): - tags_dir = os.path.join(STORAGE, repo, "_manifests", "tags") - if not os.path.isdir(tags_dir): +for root, dirs, _ in os.walk(STORAGE): + # Look for _manifests/tags directories + if not root.endswith("_manifests/tags"): continue - # Get tags with their modification times from filesystem + repo = root.replace(STORAGE + "/", "").replace("/_manifests/tags", "") + + # Get tags with modification times tag_times = [] - for tag in os.listdir(tags_dir): - tag_path = os.path.join(tags_dir, tag) + for tag in os.listdir(root): + tag_path = os.path.join(root, tag) if os.path.isdir(tag_path): mtime = os.path.getmtime(tag_path) - tag_times.append((mtime, tag)) + tag_times.append((mtime, tag, tag_path)) if len(tag_times) <= KEEP: continue - # Sort by mtime descending (newest first), delete everything past KEEP + # Sort by mtime descending (newest first) tag_times.sort(reverse=True) to_delete = tag_times[KEEP:] - print(f"[{repo}] has {len(tag_times)} tags, deleting {len(to_delete)}, keeping {KEEP}") + print(f"[{repo}] {len(tag_times)} tags -> keeping {KEEP}, deleting {len(to_delete)}") - for _, tag in to_delete: - headers_resp = api(f"/v2/{repo}/manifests/{tag}", method="HEAD", headers={ - "Accept": "application/vnd.docker.distribution.manifest.v2+json" - }) - if not headers_resp: - continue - digest = headers_resp.get("Docker-Content-Digest") or headers_resp.get("docker-content-digest") - if digest: - result = api(f"/v2/{repo}/manifests/{digest}", method="DELETE") - total_deleted += 1 + for _, tag, tag_path in to_delete: + shutil.rmtree(tag_path) + total_deleted += 1 - print(f" deleted {len(to_delete)} tags") + print(f" done") -print(f"\nDone. Deleted {total_deleted} total tags. Run garbage-collect to reclaim disk space.") +print(f"\nDeleted {total_deleted} tags. Restart registry and run garbage-collect to reclaim space.")