From 3f0ecda737380192ea2d128a782e374735938ed1 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Mon, 23 Mar 2026 11:33:06 +0200 Subject: [PATCH] harden pull-through cache: intercept errors, reduce lock timeout, add healthz MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add proxy_intercept_errors + error_page for 502/503/504 on blob locations to prevent caching truncated upstream responses (root cause of repeated ImagePullBackOff across services) - Reduce proxy_cache_lock_timeout from 15m to 5m — fail fast, let containerd retry instead of all concurrent pulls waiting on a failed first download - Add proxy_cache_valid any 0 — never cache error responses - Add /healthz endpoints on Docker Hub and GHCR servers - Add draintimeout and proxy.ttl to registry proxy configs --- modules/docker-registry/config-proxy.yaml.tpl | 2 + modules/docker-registry/config.yaml | 2 + modules/docker-registry/nginx_registry.conf | 40 +++++++++++++++++-- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/modules/docker-registry/config-proxy.yaml.tpl b/modules/docker-registry/config-proxy.yaml.tpl index 48ecfe2c..dbb9069b 100644 --- a/modules/docker-registry/config-proxy.yaml.tpl +++ b/modules/docker-registry/config-proxy.yaml.tpl @@ -17,6 +17,7 @@ storage: dryrun: false http: addr: :5000 + draintimeout: 60s headers: X-Content-Type-Options: [nosniff] health: @@ -26,3 +27,4 @@ health: threshold: 3 proxy: remoteurl: ${remote_url} + ttl: 168h diff --git a/modules/docker-registry/config.yaml b/modules/docker-registry/config.yaml index 70c5d8c7..f8d84407 100644 --- a/modules/docker-registry/config.yaml +++ b/modules/docker-registry/config.yaml @@ -19,6 +19,7 @@ storage: enabled: false http: addr: :5000 + draintimeout: 60s headers: X-Content-Type-Options: [nosniff] debug: @@ -37,3 +38,4 @@ proxy: remoteurl: https://registry-1.docker.io username: vbarzin@gmail.com password: ${password} + ttl: 168h diff --git a/modules/docker-registry/nginx_registry.conf b/modules/docker-registry/nginx_registry.conf index 2f89949e..ec433340 100644 --- a/modules/docker-registry/nginx_registry.conf +++ b/modules/docker-registry/nginx_registry.conf @@ -55,12 +55,17 @@ http { proxy_set_header Host $host; proxy_set_header Connection ""; + # Reject truncated upstream responses + proxy_intercept_errors on; + error_page 502 503 504 = @upstream_error; + proxy_cache registry; proxy_cache_lock on; - proxy_cache_lock_timeout 15m; - proxy_cache_lock_age 15m; + proxy_cache_lock_timeout 5m; + proxy_cache_lock_age 5m; proxy_cache_use_stale updating; proxy_cache_valid 200 24h; + proxy_cache_valid any 0; proxy_cache_min_uses 2; proxy_cache_methods GET; @@ -81,6 +86,17 @@ http { proxy_send_timeout 900; } + location @upstream_error { + return 502 "upstream error"; + } + + location /healthz { + proxy_pass http://dockerhub/v2/; + proxy_read_timeout 5s; + proxy_connect_timeout 3s; + access_log off; + } + location / { return 200 'ok'; add_header Content-Type text/plain; @@ -104,12 +120,17 @@ http { proxy_set_header Host $host; proxy_set_header Connection ""; + # Reject truncated upstream responses + proxy_intercept_errors on; + error_page 502 503 504 = @upstream_error; + proxy_cache registry; proxy_cache_lock on; - proxy_cache_lock_timeout 15m; - proxy_cache_lock_age 15m; + proxy_cache_lock_timeout 5m; + proxy_cache_lock_age 5m; proxy_cache_use_stale updating; proxy_cache_valid 200 24h; + proxy_cache_valid any 0; proxy_cache_min_uses 2; proxy_cache_methods GET; @@ -130,6 +151,17 @@ http { proxy_send_timeout 900; } + location @upstream_error { + return 502 "upstream error"; + } + + location /healthz { + proxy_pass http://ghcr/v2/; + proxy_read_timeout 5s; + proxy_connect_timeout 3s; + access_log off; + } + location / { return 200 'ok'; add_header Content-Type text/plain;