From 5cc91e67bf4857ae7d65d2f93ea1646b91a01384 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Tue, 26 May 2026 08:29:26 +0000 Subject: [PATCH] cloud-init: refactor to write_files for multi-line containerd setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moves the containerd_config_update_command interpolation out of the runcmd list and into a write_files block delivering /usr/local/bin/k8s-node-containerd-setup.sh. runcmd then just calls the script. Why: the heredoc in stacks/infra/main.tf has mixed-indent inner shell heredocs (CONTAINERD_GC, KUBELET_PATCH bodies at col 0, surrounding text at col 2). When inserted into a `runcmd: - $${var}` item — even wrapped in a `- |` literal block — YAML's block-indent rule terminates the block early on the col-0 lines. The result is a silent cloud-init parse failure on every new k8s node (observed 2026-05-26 during node4 rebuild — node booted into the minimal default config, no kubeadm join, no containerd tuning, no kubelet shutdown grace). write_files writes the multi-line content into a YAML literal block where the script body is just opaque text — the block's content indent is set by the `content: |` block's own indentation (col 6) and any indent >= 6 is valid content. Any further indent inside the script (like the col-0 `[plugins...]` heredoc lines now at col 6 via indent(6, ...)) is preserved cleanly. Verified: `yaml.safe_load()` on the rendered snippet now reports `runcmd=36 write_files=1` (was throwing ParserError before). Co-Authored-By: Claude Opus 4.7 --- modules/create-template-vm/cloud_init.yaml | 36 ++++++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/modules/create-template-vm/cloud_init.yaml b/modules/create-template-vm/cloud_init.yaml index b4145aca..2dc68bf0 100644 --- a/modules/create-template-vm/cloud_init.yaml +++ b/modules/create-template-vm/cloud_init.yaml @@ -55,6 +55,26 @@ apt: keyid: "9DC858229FC7DD38854AE2D88D81803C0EBFCD88" filename: docker.list +%{if is_k8s_template} +# write_files delivers the multi-line containerd/kubelet setup script to a +# file BEFORE runcmd executes. This pattern avoids the YAML interpolation bug +# where multi-line $${containerd_config_update_command} (from +# stacks/infra/main.tf — has mixed-indent inner shell heredocs) inserted into +# a single `runcmd: - $${var}` item produces invalid YAML and silently fails +# cloud-init parsing (observed 2026-05-26 during node4 rebuild). With write_files, +# the multi-line content lives in a YAML literal block where ANY indent >= the +# block's content indent is valid — so col-0 heredoc lines like +# `[plugins."io.containerd.gc.v1.scheduler"]` survive cleanly. +write_files: + - path: /usr/local/bin/k8s-node-containerd-setup.sh + permissions: '0755' + owner: root:root + content: | + #!/usr/bin/env bash + set -euo pipefail + ${indent(6, containerd_config_update_command)} +%{endif} + runcmd: # Enable weekly TRIM/discard to reclaim freed blocks in LVM thin pool - systemctl enable --now fstrim.timer @@ -107,16 +127,12 @@ runcmd: - apt-mark hold containerd containerd.io runc 2>/dev/null || true - systemctl stop kubelet - containerd config default | sudo tee /etc/containerd/config.toml - # KNOWN BUG (2026-05-26): the `${containerd_config_update_command}` heredoc in - # stacks/infra/main.tf has lines at mixed indent (most at col 2, inner shell - # heredocs CONTAINERD_GC/KUBELET_PATCH bodies at col 0). When interpolated - # into a YAML runcmd item, the rendered output is invalid YAML and cloud-init - # silently falls back to the default minimal config (skips kubeadm join, - # containerd config, kubelet tuning, iSCSI, swap). Properly fixing requires - # refactoring the inner heredocs to write_files or normalising to uniform - # indent. Until then, new k8s nodes must be backfilled manually after first - # boot (see infra/docs/runbooks/k8s-node-bootstrap-backfill.md TODO). - - ${containerd_config_update_command} + # The containerd/kubelet setup is delivered as /usr/local/bin/k8s-node-containerd-setup.sh + # via the write_files: block at the top of this file. We run it as a single + # bash invocation here so cloud-init only sees a one-line runcmd item. + # (Previous inline `- $${containerd_config_update_command}` broke YAML parsing + # because the heredoc contains mixed-indent inner shell heredocs.) + - bash /usr/local/bin/k8s-node-containerd-setup.sh - systemctl restart containerd - systemctl enable --now iscsid # Harden iSCSI: increase recovery timeout (300s vs 120s default) and enable