From c2f9ca0d130ba9675661073346ce4b05fd0c7224 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Mon, 6 Apr 2026 11:57:55 +0300 Subject: [PATCH] modules: improve create-vm with additional config options and cloud-init updates --- modules/create-template-vm/cloud_init.yaml | 4 + modules/create-vm/main.tf | 234 +++++++++++++++++---- 2 files changed, 195 insertions(+), 43 deletions(-) diff --git a/modules/create-template-vm/cloud_init.yaml b/modules/create-template-vm/cloud_init.yaml index 028af5a2..6e122f9b 100644 --- a/modules/create-template-vm/cloud_init.yaml +++ b/modules/create-template-vm/cloud_init.yaml @@ -61,6 +61,10 @@ runcmd: - sed -i 's/#Storage=auto/Storage=persistent/' /etc/systemd/journald.conf - systemctl restart systemd-journald %{if is_k8s_template} + # Disable unattended-upgrades to prevent unexpected kernel updates that can break containerd/kubelet + # (Root cause of 26h cluster outage: unattended-upgrades → kernel update → containerd failure) + - systemctl disable --now unattended-upgrades || true + - apt-get remove -y unattended-upgrades || true - apt-mark hold kubelet kubeadm kubectl - systemctl stop kubelet - containerd config default | sudo tee /etc/containerd/config.toml diff --git a/modules/create-vm/main.tf b/modules/create-vm/main.tf index 43a7a8e0..b9c3438b 100644 --- a/modules/create-vm/main.tf +++ b/modules/create-vm/main.tf @@ -1,13 +1,30 @@ +# --------------------------------------------------------------------------- +# Variables — Required +# --------------------------------------------------------------------------- + variable "vm_name" { type = string } variable "vmid" { type = number default = 0 } -variable "template_name" { type = string } +variable "cisnippet_name" { + type = string + default = "" +} +variable "bridge" { type = string } + +# --------------------------------------------------------------------------- +# Variables — VM sizing +# --------------------------------------------------------------------------- + variable "vm_cpus" { type = number default = 4 } +variable "cpu_sockets" { + type = number + default = 1 +} variable "vm_mem_mb" { type = number default = 8192 @@ -16,16 +33,19 @@ variable "vm_disk_size" { type = string default = "64G" } +variable "balloon" { + type = number + default = 0 # 0 = disabled (recommended for k8s nodes) +} + +# --------------------------------------------------------------------------- +# Variables — VM identity & networking +# --------------------------------------------------------------------------- + variable "vm_mac_address" { type = string default = null } -variable "cisnippet_name" { type = string } -variable "ssh_keys" { - type = string - default = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDHLhYDfyx237eJgOGVoJRECpUS95+7rEBS9vacsIxtx devvm" -} -variable "bridge" { type = string } variable "vlan_tag" { type = string default = null @@ -34,10 +54,90 @@ variable "ipconfig0" { type = string default = "ip=dhcp,ip6=dhcp" } + +# --------------------------------------------------------------------------- +# Variables — Boot & hardware +# --------------------------------------------------------------------------- + +variable "template_name" { + type = string + default = "" # empty = no clone (for importing existing VMs) +} +variable "scsihw" { + type = string + default = "virtio-scsi-pci" +} +variable "boot" { + type = string + default = "order=scsi0" +} +variable "boot_disk" { + type = string + default = "" # e.g., "scsi0" — only set if boot = "c" (legacy) +} +variable "disk_slot" { + type = string + default = "scsi0" # which SCSI slot the OS disk is on +} variable "agent" { type = number default = 1 } +variable "qemu_os" { + type = string + default = "l26" +} +variable "numa" { + type = bool + default = false +} +variable "machine" { + type = string + default = "" # empty = provider default. Use "q35" for GPU passthrough +} + +# --------------------------------------------------------------------------- +# Variables — Startup/shutdown ordering +# --------------------------------------------------------------------------- + +variable "startup_order" { + type = number + default = -1 +} +variable "startup_delay" { + type = number + default = -1 +} +variable "shutdown_timeout" { + type = number + default = -1 +} + +# --------------------------------------------------------------------------- +# Variables — Cloud-Init (optional — disable for non-cloud-init VMs) +# --------------------------------------------------------------------------- + +variable "use_cloud_init" { + type = bool + default = true +} +variable "ssh_keys" { + type = string + default = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDHLhYDfyx237eJgOGVoJRECpUS95+7rEBS9vacsIxtx devvm" +} + +# --------------------------------------------------------------------------- +# Variables — GPU / PCI passthrough +# --------------------------------------------------------------------------- + +variable "hostpci0" { + type = string + default = "" # e.g., "0000:06:00.0" for Tesla T4 passthrough +} + +# --------------------------------------------------------------------------- +# Resource +# --------------------------------------------------------------------------- resource "proxmox_vm_qemu" "cloudinit-vm" { vmid = var.vmid @@ -45,60 +145,80 @@ resource "proxmox_vm_qemu" "cloudinit-vm" { target_node = "pve" agent = var.agent memory = var.vm_mem_mb - boot = "order=scsi0" # has to be the same as the OS disk of the template - clone = var.template_name # The name of the template - scsihw = "virtio-scsi-single" + balloon = var.balloon + boot = var.boot + bootdisk = var.boot_disk != "" ? var.boot_disk : null + clone = var.template_name != "" ? var.template_name : null + full_clone = var.template_name != "" ? true : false + scsihw = var.scsihw vm_state = "running" - automatic_reboot = true - os_type = "cloud-init" + automatic_reboot = false # never let Terraform reboot VMs — use /reboot-server skill instead + os_type = var.use_cloud_init ? "cloud-init" : null + machine = var.machine != "" ? var.machine : null - # Cloud-Init configuration - cicustom = "vendor=local:snippets/${var.cisnippet_name}" - ciupgrade = true - nameserver = "1.1.1.1 8.8.8.8" - ipconfig0 = var.ipconfig0 - skip_ipv6 = true - ciuser = "root" - cipassword = "root" - sshkeys = var.ssh_keys - searchdomain = "viktorbarzin.lan" - start_at_node_boot = true # start on node boot - qemu_os = "l26" + # Cloud-Init configuration (only when use_cloud_init = true) + cicustom = var.use_cloud_init && var.cisnippet_name != "" ? "vendor=local:snippets/${var.cisnippet_name}" : null + ciupgrade = var.use_cloud_init ? true : null + nameserver = var.use_cloud_init ? "1.1.1.1 8.8.8.8" : null + ipconfig0 = var.use_cloud_init ? var.ipconfig0 : null + skip_ipv6 = var.use_cloud_init ? true : null + ciuser = var.use_cloud_init ? "root" : null + cipassword = var.use_cloud_init ? "root" : null + sshkeys = var.use_cloud_init ? var.ssh_keys : null + searchdomain = var.use_cloud_init ? "viktorbarzin.lan" : null + + start_at_node_boot = true + qemu_os = var.qemu_os cpu { - cores = var.vm_cpus - type = "host" # emulate host cpu - } - startup_shutdown { - order = -1 - shutdown_timeout = -1 - startup_delay = -1 + cores = var.vm_cpus + sockets = var.cpu_sockets + type = "host" + } + + startup_shutdown { + order = var.startup_order + shutdown_timeout = var.shutdown_timeout + startup_delay = var.startup_delay } - # Most cloud-init images require a serial device for their display serial { id = 0 } + disks { scsi { - scsi0 { - # We have to specify the disk from our template, else Terraform will think it's not supposed to be there - disk { - storage = "local-lvm" - # The size of the disk should be at least as big as the disk in the template. If it's smaller, the disk will be recreated - size = var.vm_disk_size + dynamic "scsi0" { + for_each = var.disk_slot == "scsi0" ? [1] : [] + content { + disk { + storage = "local-lvm" + size = var.vm_disk_size + } + } + } + dynamic "scsi1" { + for_each = var.disk_slot == "scsi1" ? [1] : [] + content { + disk { + storage = "local-lvm" + size = var.vm_disk_size + } } } } - ide { - # Some images require a cloud-init disk on the IDE controller, others on the SCSI or SATA controller - ide1 { - cloudinit { - storage = "local-lvm" + dynamic "ide" { + for_each = var.use_cloud_init ? [1] : [] + content { + ide1 { + cloudinit { + storage = "local-lvm" + } } } } } + network { id = 0 bridge = var.bridge @@ -106,4 +226,32 @@ resource "proxmox_vm_qemu" "cloudinit-vm" { macaddr = var.vm_mac_address tag = var.vlan_tag } + + # Safety: ignore dynamically-attached iSCSI PVC disks (managed by democratic-csi) + # and cloud-init changes that drift after initial provisioning + lifecycle { + prevent_destroy = true + ignore_changes = [ + # democratic-csi dynamically attaches/detaches iSCSI disks + disks[0].scsi[0].scsi1, + disks[0].scsi[0].scsi2, + disks[0].scsi[0].scsi3, + disks[0].scsi[0].scsi4, + disks[0].scsi[0].scsi5, + # cloud-init config may drift after first boot + cicustom, + ciupgrade, + ciuser, + cipassword, + sshkeys, + # SMBIOS UUID and vmgenid are auto-generated + smbios, + # Tags and description may be edited in Proxmox UI + tags, + desc, + # Provider defaults that differ from imported state + define_connection_info, + full_clone, + ] + } }