diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index fbf18f5f..4bccf76e 100755 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -330,7 +330,14 @@ jellyfin, jellyseerr, tdarr, affine, health, family - Kubernetes cluster with GPU node (5 nodes: k8s-master + k8s-node1-4, running v1.34.2) - NFS server at 10.0.10.15 for storage - Redis shared service at `redis.redis.svc.cluster.local` -- Docker registry at 10.0.20.10 +- Docker registry pull-through cache at 10.0.20.10 (static IP via cloud-init) + - Port 5000: docker.io (Docker Hub, with auth) + - Port 5010: ghcr.io + - Port 5020: quay.io + - Port 5030: registry.k8s.io + - Port 5040: reg.kyverno.io + - Worker nodes use `config_path = "/etc/containerd/certs.d"` with per-registry `hosts.toml` files + - k8s-master does NOT use pull-through cache (containerd 1.6.x incompatibility with config_path + mirrors) ### Proxmox Host Hardware - **CPU**: Intel Xeon E5-2699 v4 @ 2.20GHz (22 cores / 44 threads, single socket) diff --git a/main.tf b/main.tf index c0a08a70..a953499c 100644 --- a/main.tf +++ b/main.tf @@ -210,8 +210,28 @@ module "k8s-node-template" { snippet_name = local.k8s_cloud_init_snippet_name # Add mirror registry containerd_config_update_command = <<-EOF - # BELOW IS DEPRECATED - replace with config_path version!!!! - echo '[plugins.\"io.containerd.grpc.v1.cri\".registry.mirrors.\"*\"]' >> /etc/containerd/config.toml && echo ' endpoint = [\"http://10.0.20.10:5000\"]' >> /etc/containerd/config.toml # docker registry vm + # Set up config_path for per-registry mirror configuration + sed -i 's|config_path = ""|config_path = "/etc/containerd/certs.d"|' /etc/containerd/config.toml + + # Create hosts.toml for docker.io (Docker Hub) + mkdir -p /etc/containerd/certs.d/docker.io + printf 'server = "https://registry-1.docker.io"\n\n[host."http://10.0.20.10:5000"]\n capabilities = ["pull", "resolve"]\n' > /etc/containerd/certs.d/docker.io/hosts.toml + + # Create hosts.toml for ghcr.io + mkdir -p /etc/containerd/certs.d/ghcr.io + printf 'server = "https://ghcr.io"\n\n[host."http://10.0.20.10:5010"]\n capabilities = ["pull", "resolve"]\n' > /etc/containerd/certs.d/ghcr.io/hosts.toml + + # Create hosts.toml for quay.io + mkdir -p /etc/containerd/certs.d/quay.io + printf 'server = "https://quay.io"\n\n[host."http://10.0.20.10:5020"]\n capabilities = ["pull", "resolve"]\n' > /etc/containerd/certs.d/quay.io/hosts.toml + + # Create hosts.toml for registry.k8s.io + mkdir -p /etc/containerd/certs.d/registry.k8s.io + printf 'server = "https://registry.k8s.io"\n\n[host."http://10.0.20.10:5030"]\n capabilities = ["pull", "resolve"]\n' > /etc/containerd/certs.d/registry.k8s.io/hosts.toml + + # Create hosts.toml for reg.kyverno.io + mkdir -p /etc/containerd/certs.d/reg.kyverno.io + printf 'server = "https://reg.kyverno.io"\n\n[host."http://10.0.20.10:5040"]\n capabilities = ["pull", "resolve"]\n' > /etc/containerd/certs.d/reg.kyverno.io/hosts.toml sed -i 's/.*max_concurrent_downloads = 3/max_concurrent_downloads = 20/g' /etc/containerd/config.toml # Enable multiple concurrent downloads sudo sed -i '/serializeImagePulls:/d' /var/lib/kubelet/config.yaml && \ @@ -270,8 +290,56 @@ module "docker-registry-template" { ) ), "( crontab -l 2>/dev/null; echo '0 3 * * 0 /usr/bin/docker exec registry registry garbage-collect -m /etc/docker/registry/config.yml' ) | crontab -", - "( crontab -l 2>/dev/null; echo '0 * * * * /usr/bin/docker restart registry' ) | crontab -", + "( crontab -l 2>/dev/null; echo '0 * * * * /usr/bin/docker restart registry registry-ghcr registry-quay registry-k8s registry-kyverno' ) | crontab -", "docker run -p 5000:5000 -p 5001:5001 -d --restart always --name registry -v /etc/docker-registry/config.yml:/etc/docker/registry/config.yml registry:2", + # ghcr.io proxy + "mkdir -p /etc/docker-registry/ghcr", + format("echo %s | base64 -d > /etc/docker-registry/ghcr/config.yml", + base64encode( + templatefile("./modules/docker-registry/config-proxy.yaml.tpl", { + name = "ghcr" + remote_url = "https://ghcr.io" + }) + ) + ), + "docker run -p 5010:5000 -d --restart always --name registry-ghcr -v /etc/docker-registry/ghcr/config.yml:/etc/docker/registry/config.yml registry:2", + "( crontab -l 2>/dev/null; echo '5 3 * * 0 /usr/bin/docker exec registry-ghcr registry garbage-collect -m /etc/docker/registry/config.yml' ) | crontab -", + # quay.io proxy + "mkdir -p /etc/docker-registry/quay", + format("echo %s | base64 -d > /etc/docker-registry/quay/config.yml", + base64encode( + templatefile("./modules/docker-registry/config-proxy.yaml.tpl", { + name = "quay" + remote_url = "https://quay.io" + }) + ) + ), + "docker run -p 5020:5000 -d --restart always --name registry-quay -v /etc/docker-registry/quay/config.yml:/etc/docker/registry/config.yml registry:2", + "( crontab -l 2>/dev/null; echo '10 3 * * 0 /usr/bin/docker exec registry-quay registry garbage-collect -m /etc/docker/registry/config.yml' ) | crontab -", + # registry.k8s.io proxy + "mkdir -p /etc/docker-registry/k8s", + format("echo %s | base64 -d > /etc/docker-registry/k8s/config.yml", + base64encode( + templatefile("./modules/docker-registry/config-proxy.yaml.tpl", { + name = "k8s" + remote_url = "https://registry.k8s.io" + }) + ) + ), + "docker run -p 5030:5000 -d --restart always --name registry-k8s -v /etc/docker-registry/k8s/config.yml:/etc/docker/registry/config.yml registry:2", + "( crontab -l 2>/dev/null; echo '15 3 * * 0 /usr/bin/docker exec registry-k8s registry garbage-collect -m /etc/docker/registry/config.yml' ) | crontab -", + # reg.kyverno.io proxy + "mkdir -p /etc/docker-registry/kyverno", + format("echo %s | base64 -d > /etc/docker-registry/kyverno/config.yml", + base64encode( + templatefile("./modules/docker-registry/config-proxy.yaml.tpl", { + name = "kyverno" + remote_url = "https://reg.kyverno.io" + }) + ) + ), + "docker run -p 5040:5000 -d --restart always --name registry-kyverno -v /etc/docker-registry/kyverno/config.yml:/etc/docker/registry/config.yml registry:2", + "( crontab -l 2>/dev/null; echo '20 3 * * 0 /usr/bin/docker exec registry-kyverno registry garbage-collect -m /etc/docker/registry/config.yml' ) | crontab -", # Setup the registry nginx config; We want clients to connect via the nginx to serialize requests for the same blobs # Otherwise race conditions lead to corrupt blobs "mkdir -p /var/cache/nginx/registry", @@ -305,10 +373,15 @@ module "docker-registry-vm" { vm_mac_address = "DE:AD:BE:EF:22:22" # mapped to 10.0.20.10 in dhcp bridge = "vmbr1" vlan_tag = "20" + ipconfig0 = "ip=10.0.20.10/24,gw=10.0.20.1" # ports: - # 5000 -> registry + # 5000 -> registry (docker.io proxy) # 5001 -> metrics # 5002 -> nginx proxy <-- use this to prevent races on the same blobs + # 5010 -> registry-ghcr (ghcr.io proxy) + # 5020 -> registry-quay (quay.io proxy) + # 5030 -> registry-k8s (registry.k8s.io proxy) + # 5040 -> registry-kyverno (reg.kyverno.io proxy) # 8080 -> registry-ui (joxit/docker-registry-ui) } diff --git a/modules/create-vm/main.tf b/modules/create-vm/main.tf index 219d41c2..b7e451df 100644 --- a/modules/create-vm/main.tf +++ b/modules/create-vm/main.tf @@ -30,6 +30,10 @@ variable "vlan_tag" { type = string default = null } +variable "ipconfig0" { + type = string + default = "ip=dhcp,ip6=dhcp" +} resource "proxmox_vm_qemu" "cloudinit-vm" { vmid = var.vmid @@ -48,7 +52,7 @@ resource "proxmox_vm_qemu" "cloudinit-vm" { cicustom = "vendor=local:snippets/${var.cisnippet_name}" ciupgrade = true nameserver = "1.1.1.1 8.8.8.8" - ipconfig0 = "ip=dhcp,ip6=dhcp" + ipconfig0 = var.ipconfig0 skip_ipv6 = true ciuser = "root" cipassword = "root" diff --git a/modules/docker-registry/config-proxy.yaml.tpl b/modules/docker-registry/config-proxy.yaml.tpl new file mode 100644 index 00000000..018107c3 --- /dev/null +++ b/modules/docker-registry/config-proxy.yaml.tpl @@ -0,0 +1,29 @@ +version: 0.1 +log: + fields: + service: registry-${name} +storage: + cache: + blobdescriptor: inmemory + filesystem: + rootdirectory: /var/lib/registry + maxsize: 5GiB + delete: + enabled: true + maintenance: + uploadpurging: + enabled: true + age: 24h + interval: 4h + dryrun: false +http: + addr: :5000 + headers: + X-Content-Type-Options: [nosniff] +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 +proxy: + remoteurl: ${remote_url} diff --git a/scripts/setup_containerd_mirrors.sh b/scripts/setup_containerd_mirrors.sh new file mode 100755 index 00000000..f0101b32 --- /dev/null +++ b/scripts/setup_containerd_mirrors.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# setup_containerd_mirrors.sh +# Replaces deprecated wildcard registry mirror with per-registry hosts.toml config. +# Run on each K8s WORKER node: ssh wizard@ 'sudo bash -s' < scripts/setup_containerd_mirrors.sh +# NOTE: Do NOT run on k8s-master (containerd 1.6.x has conflicts with config_path + mirrors coexisting) + +set -euo pipefail + +TIMESTAMP=$(date +%s) +CONFIG="/etc/containerd/config.toml" +CERTS_DIR="/etc/containerd/certs.d" + +echo "=== Backing up containerd config ===" +cp "$CONFIG" "${CONFIG}.bak.${TIMESTAMP}" + +echo "=== Removing deprecated mirror entries ===" +# Remove wildcard mirror and its endpoint +sed -i '/\[plugins\."io\.containerd\.grpc\.v1\.cri"\.registry\.mirrors\."\*"\]/d' "$CONFIG" +sed -i '/endpoint = \["http:\/\/10\.0\.20\.10:5000"\]/d' "$CONFIG" +# Remove any other per-registry mirror sections (e.g. docker.io) to avoid config_path conflict +sed -i '/\[plugins\."io\.containerd\.grpc\.v1\.cri"\.registry\.mirrors\."docker\.io"\]/d' "$CONFIG" +sed -i '/endpoint = \["https:\/\/registry-1\.docker\.io"\]/d' "$CONFIG" +# Remove the mirrors parent section header if it's now empty +sed -i '/\[plugins\."io\.containerd\.grpc\.v1\.cri"\.registry\.mirrors\]$/d' "$CONFIG" + +echo "=== Setting config_path ===" +# Replace empty config_path with certs.d path +if grep -q 'config_path = ""' "$CONFIG"; then + sed -i 's|config_path = ""|config_path = "/etc/containerd/certs.d"|' "$CONFIG" +elif grep -q 'config_path = "/etc/containerd/certs.d"' "$CONFIG"; then + echo "config_path already set, skipping" +else + # If config_path line doesn't exist at all, add it under [plugins."io.containerd.grpc.v1.cri".registry] + sed -i '/\[plugins\."io\.containerd\.grpc\.v1\.cri"\.registry\]/a\ config_path = "/etc/containerd/certs.d"' "$CONFIG" +fi + +echo "=== Creating hosts.toml files ===" + +# docker.io (Docker Hub) +mkdir -p "$CERTS_DIR/docker.io" +printf 'server = "https://registry-1.docker.io"\n\n[host."http://10.0.20.10:5000"]\n capabilities = ["pull", "resolve"]\n' > "$CERTS_DIR/docker.io/hosts.toml" + +# ghcr.io +mkdir -p "$CERTS_DIR/ghcr.io" +printf 'server = "https://ghcr.io"\n\n[host."http://10.0.20.10:5010"]\n capabilities = ["pull", "resolve"]\n' > "$CERTS_DIR/ghcr.io/hosts.toml" + +# quay.io +mkdir -p "$CERTS_DIR/quay.io" +printf 'server = "https://quay.io"\n\n[host."http://10.0.20.10:5020"]\n capabilities = ["pull", "resolve"]\n' > "$CERTS_DIR/quay.io/hosts.toml" + +# registry.k8s.io +mkdir -p "$CERTS_DIR/registry.k8s.io" +printf 'server = "https://registry.k8s.io"\n\n[host."http://10.0.20.10:5030"]\n capabilities = ["pull", "resolve"]\n' > "$CERTS_DIR/registry.k8s.io/hosts.toml" + +# reg.kyverno.io +mkdir -p "$CERTS_DIR/reg.kyverno.io" +printf 'server = "https://reg.kyverno.io"\n\n[host."http://10.0.20.10:5040"]\n capabilities = ["pull", "resolve"]\n' > "$CERTS_DIR/reg.kyverno.io/hosts.toml" + +echo "=== Restarting containerd ===" +systemctl restart containerd + +echo "=== Verifying containerd is running ===" +systemctl is-active containerd + +echo "=== Done ==="