diff --git a/modules/create-template-vm/cloud_init.yaml b/modules/create-template-vm/cloud_init.yaml index b4145aca..2dc68bf0 100644 --- a/modules/create-template-vm/cloud_init.yaml +++ b/modules/create-template-vm/cloud_init.yaml @@ -55,6 +55,26 @@ apt: keyid: "9DC858229FC7DD38854AE2D88D81803C0EBFCD88" filename: docker.list +%{if is_k8s_template} +# write_files delivers the multi-line containerd/kubelet setup script to a +# file BEFORE runcmd executes. This pattern avoids the YAML interpolation bug +# where multi-line $${containerd_config_update_command} (from +# stacks/infra/main.tf — has mixed-indent inner shell heredocs) inserted into +# a single `runcmd: - $${var}` item produces invalid YAML and silently fails +# cloud-init parsing (observed 2026-05-26 during node4 rebuild). With write_files, +# the multi-line content lives in a YAML literal block where ANY indent >= the +# block's content indent is valid — so col-0 heredoc lines like +# `[plugins."io.containerd.gc.v1.scheduler"]` survive cleanly. +write_files: + - path: /usr/local/bin/k8s-node-containerd-setup.sh + permissions: '0755' + owner: root:root + content: | + #!/usr/bin/env bash + set -euo pipefail + ${indent(6, containerd_config_update_command)} +%{endif} + runcmd: # Enable weekly TRIM/discard to reclaim freed blocks in LVM thin pool - systemctl enable --now fstrim.timer @@ -107,16 +127,12 @@ runcmd: - apt-mark hold containerd containerd.io runc 2>/dev/null || true - systemctl stop kubelet - containerd config default | sudo tee /etc/containerd/config.toml - # KNOWN BUG (2026-05-26): the `${containerd_config_update_command}` heredoc in - # stacks/infra/main.tf has lines at mixed indent (most at col 2, inner shell - # heredocs CONTAINERD_GC/KUBELET_PATCH bodies at col 0). When interpolated - # into a YAML runcmd item, the rendered output is invalid YAML and cloud-init - # silently falls back to the default minimal config (skips kubeadm join, - # containerd config, kubelet tuning, iSCSI, swap). Properly fixing requires - # refactoring the inner heredocs to write_files or normalising to uniform - # indent. Until then, new k8s nodes must be backfilled manually after first - # boot (see infra/docs/runbooks/k8s-node-bootstrap-backfill.md TODO). - - ${containerd_config_update_command} + # The containerd/kubelet setup is delivered as /usr/local/bin/k8s-node-containerd-setup.sh + # via the write_files: block at the top of this file. We run it as a single + # bash invocation here so cloud-init only sees a one-line runcmd item. + # (Previous inline `- $${containerd_config_update_command}` broke YAML parsing + # because the heredoc contains mixed-indent inner shell heredocs.) + - bash /usr/local/bin/k8s-node-containerd-setup.sh - systemctl restart containerd - systemctl enable --now iscsid # Harden iSCSI: increase recovery timeout (300s vs 120s default) and enable