[ci skip] Fix registry tag cleanup for pull-through cache

- Rewrite cleanup script to use filesystem deletion (shutil.rmtree)
  since proxy registries don't support DELETE via API (405)
- Fix cron entry to invoke with python3
This commit is contained in:
Viktor Barzin 2026-02-07 22:45:17 +00:00
parent 4671ef34a3
commit 375e3e115a
2 changed files with 21 additions and 45 deletions

View file

@ -281,7 +281,7 @@ module "docker-registry-template" {
format("echo %s | base64 -d > /etc/docker-registry/cleanup-tags.sh && chmod +x /etc/docker-registry/cleanup-tags.sh",
base64encode(file("${path.root}/modules/docker-registry/cleanup-tags.sh"))
),
"( crontab -l 2>/dev/null; echo '0 2 * * * /etc/docker-registry/cleanup-tags.sh 10 >> /var/log/registry-cleanup.log 2>&1' ) | crontab -",
"( crontab -l 2>/dev/null; echo '0 2 * * * python3 /etc/docker-registry/cleanup-tags.sh 10 >> /var/log/registry-cleanup.log 2>&1' ) | crontab -",
]
}

View file

@ -1,72 +1,48 @@
#!/usr/bin/env python3
"""Keeps only the N most recent tags per image in a Docker registry.
Uses filesystem modification times on the tag directories for speed.
Run garbage-collect after this to reclaim disk space."""
"""Keeps only the N most recent tags per image in a pull-through cache registry.
Deletes old tag links directly from the filesystem since the API doesn't support
DELETE on proxy registries. Run garbage-collect after to reclaim blob storage."""
import json
import os
import shutil
import sys
import urllib.request
sys.stdout.reconfigure(line_buffering=True)
REGISTRY = "http://127.0.0.1:5000"
KEEP = int(sys.argv[1]) if len(sys.argv) > 1 else 10
# Registry storage path (docker volume)
STORAGE = "/var/lib/docker/volumes/57b3f1c5fcc7f39c040e17072e10b4536245357d09340206683c04096d30b942/_data/docker/registry/v2/repositories"
def api(path, method="GET", headers=None):
req = urllib.request.Request(f"{REGISTRY}{path}", method=method, headers=headers or {})
try:
with urllib.request.urlopen(req, timeout=30) as r:
if method == "HEAD":
return dict(r.headers)
return json.loads(r.read())
except Exception:
return None
# Get all repos
catalog = api("/v2/_catalog")
if not catalog:
print("Failed to fetch catalog")
sys.exit(1)
total_deleted = 0
for repo in catalog.get("repositories", []):
tags_dir = os.path.join(STORAGE, repo, "_manifests", "tags")
if not os.path.isdir(tags_dir):
for root, dirs, _ in os.walk(STORAGE):
# Look for _manifests/tags directories
if not root.endswith("_manifests/tags"):
continue
# Get tags with their modification times from filesystem
repo = root.replace(STORAGE + "/", "").replace("/_manifests/tags", "")
# Get tags with modification times
tag_times = []
for tag in os.listdir(tags_dir):
tag_path = os.path.join(tags_dir, tag)
for tag in os.listdir(root):
tag_path = os.path.join(root, tag)
if os.path.isdir(tag_path):
mtime = os.path.getmtime(tag_path)
tag_times.append((mtime, tag))
tag_times.append((mtime, tag, tag_path))
if len(tag_times) <= KEEP:
continue
# Sort by mtime descending (newest first), delete everything past KEEP
# Sort by mtime descending (newest first)
tag_times.sort(reverse=True)
to_delete = tag_times[KEEP:]
print(f"[{repo}] has {len(tag_times)} tags, deleting {len(to_delete)}, keeping {KEEP}")
print(f"[{repo}] {len(tag_times)} tags -> keeping {KEEP}, deleting {len(to_delete)}")
for _, tag in to_delete:
headers_resp = api(f"/v2/{repo}/manifests/{tag}", method="HEAD", headers={
"Accept": "application/vnd.docker.distribution.manifest.v2+json"
})
if not headers_resp:
continue
digest = headers_resp.get("Docker-Content-Digest") or headers_resp.get("docker-content-digest")
if digest:
result = api(f"/v2/{repo}/manifests/{digest}", method="DELETE")
total_deleted += 1
for _, tag, tag_path in to_delete:
shutil.rmtree(tag_path)
total_deleted += 1
print(f" deleted {len(to_delete)} tags")
print(f" done")
print(f"\nDone. Deleted {total_deleted} total tags. Run garbage-collect to reclaim disk space.")
print(f"\nDeleted {total_deleted} tags. Restart registry and run garbage-collect to reclaim space.")