From 9b75b2817b2d7c892d251462d0b966580056c025 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 07:06:50 +0000 Subject: [PATCH] cloud-init: fix k8s node bootstrap snippet (multi-line interp + containerd v2 quotes) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs found while rebuilding k8s-node4 (2026-05-26): 1. **runcmd YAML breakage**: `- $${containerd_config_update_command}` interpolated a multi-line heredoc as bare list-item content. The trailing lines lost their list-item prefix, breaking cloud-config parsing. Cloud-init silently fell back to the minimal default (hostname + package_upgrade only) — kubeadm join, containerd config, kubelet tuning, iSCSI hardening, swap, ALL skipped. No error visible in `cloud-init status`. Fix: wrap the interpolation in `- |` literal block with `indent(4, ...)`. 2. **containerd v2 single-quote mismatch**: `containerd config default` in v2 writes `config_path = ''` (single quotes), v1 writes `""` (double). The sed pattern matched only double quotes → silent no-op on fresh containerd 2.x nodes → registry-mirror hosts.toml ignored → all image pulls hit upstream registries → DNS-to-MetalLB chicken-and-egg loop. Fix: match any value with `config_path = .*`. Co-Authored-By: Claude Opus 4.7 --- modules/create-template-vm/cloud_init.yaml | 7 ++++++- stacks/infra/main.tf | 8 ++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/modules/create-template-vm/cloud_init.yaml b/modules/create-template-vm/cloud_init.yaml index 1dc683f6..751b04ec 100644 --- a/modules/create-template-vm/cloud_init.yaml +++ b/modules/create-template-vm/cloud_init.yaml @@ -107,7 +107,12 @@ runcmd: - apt-mark hold containerd containerd.io runc 2>/dev/null || true - systemctl stop kubelet - containerd config default | sudo tee /etc/containerd/config.toml - - ${containerd_config_update_command} + # Multi-line containerd config update — wrapped in `- |` literal block so the + # heredoc content survives YAML rendering. Without this, the multi-line var + # gets inserted as bare top-level lines and breaks the cloud-config parser + # (silent fallback to default — observed 2026-05-26 during node4 rebuild). + - | + ${indent(4, containerd_config_update_command)} - systemctl restart containerd - systemctl enable --now iscsid # Harden iSCSI: increase recovery timeout (300s vs 120s default) and enable diff --git a/stacks/infra/main.tf b/stacks/infra/main.tf index d78f86de..fabca8bd 100644 --- a/stacks/infra/main.tf +++ b/stacks/infra/main.tf @@ -64,8 +64,12 @@ module "k8s-node-template" { snippet_name = local.k8s_cloud_init_snippet_name # Add mirror registry containerd_config_update_command = <<-EOF - # Set up config_path for per-registry mirror configuration - sed -i 's|config_path = ""|config_path = "/etc/containerd/certs.d"|' /etc/containerd/config.toml + # Set up config_path for per-registry mirror configuration. + # NOTE: containerd v2 writes `config_path = ''` (single quotes) on + # `config default`; v1 writes `config_path = ""`. Match both forms so this + # is idempotent across versions. Without the v2 match, hosts.toml mirror + # config is silently ignored — observed 2026-05-26 on node4 (containerd v2.2.4). + sed -i 's|config_path = .*|config_path = "/etc/containerd/certs.d"|' /etc/containerd/config.toml # Create hosts.toml for docker.io (Docker Hub) — high traffic, rate-limited mkdir -p /etc/containerd/certs.d/docker.io