From 23d87d88856e63475fd139dc72c8405b8c672e96 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Thu, 4 Jun 2026 02:01:49 +0000 Subject: [PATCH] cluster-health #20: fix false NFS FAIL on Linux (nc -G is macOS-only) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The NFS connectivity check fell through to `nc -z -G 3 192.168.1.127 2049` when `showmount` is absent (the DevVM ships no nfs-common). But `-G` is a macOS/Darwin-only connect-timeout flag — OpenBSD/GNU nc on Linux rejects it with "invalid option -- 'G'", so the elif failed and the check reported "NFS unreachable" on every Linux run even though port 2049 was wide open (confirmed via /dev/tcp). All deployment/PVC/statefulset checks were green throughout — a real PVE NFS outage would have taken down 30+ services. Fix: use the portable `-w` timeout flag, and add a final bash /dev/tcp fallback so the probe is correct even on hosts with neither showmount nor a usable nc. --- scripts/cluster_healthcheck.sh | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/scripts/cluster_healthcheck.sh b/scripts/cluster_healthcheck.sh index c3656bae..f415270d 100755 --- a/scripts/cluster_healthcheck.sh +++ b/scripts/cluster_healthcheck.sh @@ -1165,12 +1165,22 @@ check_kyverno() { check_nfs() { section 20 "NFS Connectivity" + # Probe order: showmount (richest — lists exports, needs nfs-common) → + # nc port check → bash /dev/tcp (no external tooling at all). The last + # fallback guarantees correctness on hosts lacking showmount/nc (e.g. the + # DevVM ships neither nfs-common nor a GNU nc). NOTE: the timeout flag is + # `-w` (portable, OpenBSD + GNU + macOS nc); `-G` is macOS-only and errors + # out as "invalid option" on Linux — which previously made this check FAIL + # even when 2049 was wide open. if showmount -e 192.168.1.127 &>/dev/null; then pass "NFS server 192.168.1.127 (Proxmox) reachable (exports listed)" json_add "nfs" "PASS" "NFS reachable" - elif nc -z -G 3 192.168.1.127 2049 &>/dev/null; then + elif nc -z -w 3 192.168.1.127 2049 &>/dev/null; then pass "NFS server 192.168.1.127 port 2049 open" json_add "nfs" "PASS" "NFS port open" + elif timeout 3 bash -c 'exec 3<>/dev/tcp/192.168.1.127/2049' &>/dev/null; then + pass "NFS server 192.168.1.127 port 2049 open (/dev/tcp)" + json_add "nfs" "PASS" "NFS port open" else [[ "$QUIET" == true ]] && section_always 20 "NFS Connectivity" fail "NFS server 192.168.1.127 (Proxmox) unreachable — 30+ services depend on NFS"