#!/usr/bin/env bash set -euo pipefail AGENT="nfs-health" KUBECTL="kubectl --kubeconfig /Users/viktorbarzin/code/infra/config" TRUENAS_HOST="10.0.10.15" NODES=("k8s-master:10.0.20.100" "k8s-node1:10.0.20.101" "k8s-node2:10.0.20.102" "k8s-node3:10.0.20.103" "k8s-node4:10.0.20.104") SSH_USER="wizard" DRY_RUN=false for arg in "$@"; do case "$arg" in --dry-run) DRY_RUN=true ;; esac done checks=() add_check() { local name="$1" status="$2" message="$3" checks+=("{\"name\": \"$name\", \"status\": \"$status\", \"message\": \"$message\"}") } check_truenas_reachable() { if $DRY_RUN; then add_check "truenas-reachable" "ok" "dry-run: would ping $TRUENAS_HOST" return fi if timeout 5 ping -c 1 "$TRUENAS_HOST" &>/dev/null; then add_check "truenas-reachable" "ok" "TrueNAS at $TRUENAS_HOST is reachable" else add_check "truenas-reachable" "fail" "TrueNAS at $TRUENAS_HOST is unreachable" fi } check_truenas_nfs_service() { if $DRY_RUN; then add_check "truenas-nfs-service" "ok" "dry-run: would check NFS service on TrueNAS" return fi local result if result=$(timeout 10 ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "root@$TRUENAS_HOST" \ "service nfs-server status 2>/dev/null || systemctl is-active nfs-server 2>/dev/null || echo 'unknown'" 2>/dev/null); then if echo "$result" | grep -qiE "running|active|is running"; then add_check "truenas-nfs-service" "ok" "NFS service is running on TrueNAS" else add_check "truenas-nfs-service" "warn" "NFS service status unclear: $(echo "$result" | head -1 | tr '"' "'")" fi else add_check "truenas-nfs-service" "fail" "Could not check NFS service on TrueNAS via SSH" fi } check_node_nfs_mounts() { local node_name="$1" node_ip="$2" if $DRY_RUN; then add_check "nfs-mounts-$node_name" "ok" "dry-run: would check NFS mounts on $node_name ($node_ip)" return fi local mount_output if ! mount_output=$(timeout 15 ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$SSH_USER@$node_ip" \ "mount | grep nfs" 2>/dev/null); then add_check "nfs-mounts-$node_name" "warn" "No NFS mounts found or SSH failed on $node_name ($node_ip)" return fi if [ -z "$mount_output" ]; then add_check "nfs-mounts-$node_name" "warn" "No NFS mounts found on $node_name" return fi local mount_count mount_count=$(echo "$mount_output" | wc -l | tr -d ' ') # Check for stale mounts by trying to stat each mount point local stale_count=0 local stale_mounts="" while IFS= read -r line; do local mount_point mount_point=$(echo "$line" | awk '{print $3}') if [ -n "$mount_point" ]; then if ! timeout 10 ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$SSH_USER@$node_ip" \ "timeout 5 stat '$mount_point' >/dev/null 2>&1" 2>/dev/null; then stale_count=$((stale_count + 1)) stale_mounts="$stale_mounts $mount_point" fi fi done <<< "$mount_output" if [ "$stale_count" -gt 0 ]; then add_check "nfs-mounts-$node_name" "fail" "$stale_count/$mount_count NFS mounts stale on $node_name:$stale_mounts" else add_check "nfs-mounts-$node_name" "ok" "$mount_count NFS mounts healthy on $node_name" fi } check_nfs_pvcs() { if $DRY_RUN; then add_check "nfs-pvcs" "ok" "dry-run: would check NFS-backed PVCs" return fi local pending pending=$($KUBECTL get pvc --all-namespaces --field-selector='status.phase!=Bound' -o json 2>/dev/null | \ python3 -c "import sys,json; items=json.load(sys.stdin).get('items',[]); nfs=[i for i in items if 'nfs' in json.dumps(i).lower()]; print(len(nfs))" 2>/dev/null || echo "error") if [ "$pending" = "error" ]; then add_check "nfs-pvcs" "warn" "Could not check NFS PVC status" elif [ "$pending" = "0" ]; then add_check "nfs-pvcs" "ok" "All NFS-backed PVCs are bound" else add_check "nfs-pvcs" "fail" "$pending NFS-backed PVCs are not bound" fi } # Run checks check_truenas_reachable check_truenas_nfs_service for node_entry in "${NODES[@]}"; do node_name="${node_entry%%:*}" node_ip="${node_entry##*:}" check_node_nfs_mounts "$node_name" "$node_ip" done check_nfs_pvcs # Determine overall status overall="ok" for c in "${checks[@]}"; do if echo "$c" | grep -q '"status": "fail"'; then overall="fail" break elif echo "$c" | grep -q '"status": "warn"'; then overall="warn" fi done # Output JSON checks_json=$(IFS=,; echo "${checks[*]}") cat <