From 51456a96f66f5981f58382f2001083fbe0ec42d9 Mon Sep 17 00:00:00 2001 From: Viktor Barzin Date: Fri, 5 Jun 2026 11:10:27 +0000 Subject: [PATCH] fan-control: estimate + expose fan power (fan_watts_est) The iDRAC reports only total DCMI watts + RPM (no per-fan power), so add a cube-law fan-power estimate: fan_W ~= 0.0205*(RPM/1000)^3, calibrated to the 2026-06-05 sweep (fits within ~3W; ~2W floor -> ~99W full). The daemon reads live RPM each loop and pushes pve_fan_control_fan_rpm + _fan_watts_est. Surfaced in HA as sensor.r730_fan_power_est + a "Fan Power (est)" card on the dashboard-it Server view, next to total power. 46 bash tests green; verified live (9120rpm -> ~15W est). [ci skip] Co-Authored-By: Claude Opus 4.8 --- .../2026-06-04-pve-fan-control-design.md | 17 +++++++++++++---- docs/runbooks/fan-control.md | 5 ++++- scripts/fan-control.sh | 19 +++++++++++++++++-- scripts/test-fan-control.sh | 7 +++++++ 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/docs/plans/2026-06-04-pve-fan-control-design.md b/docs/plans/2026-06-04-pve-fan-control-design.md index 6e140330..a4eb8728 100644 --- a/docs/plans/2026-06-04-pve-fan-control-design.md +++ b/docs/plans/2026-06-04-pve-fan-control-design.md @@ -103,10 +103,19 @@ Manual fan mode bypasses the iDRAC's own protection, so it is backstopped: ## Observability -Pushes to the existing Pushgateway (`http://10.0.20.100:30091`, job -`fan_control`): `pve_fan_control_cpu_temp_celsius`, `_fan_percent`, `_mode` -(1 quiet / 2 cool / 0 fallback), `_ha_reachable`, `_fallback`. The existing CPU- -temp alert is unaffected. +Pushes to the Pushgateway (`http://10.0.20.100:30091`, job `fan_control`): +`pve_fan_control_cpu_temp_celsius`, `_fan_percent`, `_mode` (1 quiet / 2 cool / +3 manual / 0 fallback), `_ha_reachable`, `_fallback`, `_fan_rpm`, and +`_fan_watts_est`. + +**Fan power is ESTIMATED** — the iDRAC exposes only total DCMI watts + RPM (no +per-fan power), so `_fan_watts_est` models it from RPM via the fan affinity law +(power ∝ RPM³), calibrated to the 2026-06-05 sweep: `fan_W ≈ 0.0205·(RPM/1000)³` +(≈2 W at the floor → ~99 W at full; fits the sweep within ~3 W). Surfaced in HA +as `sensor.r730_fan_power_est` + a "Fan Power (est)" card on the dashboard-it +Server view, next to total power (`sensor.r730_power_consumption`, redfish) — so +the fan tax of the control curve is visible. The existing CPU-temp alert is +unaffected. ## Testing diff --git a/docs/runbooks/fan-control.md b/docs/runbooks/fan-control.md index 9c4df935..4cd535a7 100644 --- a/docs/runbooks/fan-control.md +++ b/docs/runbooks/fan-control.md @@ -24,7 +24,10 @@ can't run the fans wrong indefinitely. `CEILING` (83 °C) still overrides everything → Dell auto. An HA change is applied within one daemon loop (~15 s). Monitoring sensors on the same view: `sensor.r730_fan_speed` (redfish exporter), -`sensor.r730_fan_control_target` + `sensor.r730_fan_control_mode` (Pushgateway). +`sensor.r730_fan_control_target` + `sensor.r730_fan_control_mode` + +`sensor.r730_fan_power_est` (Pushgateway). `r730_fan_power_est` is an ESTIMATE of +total fan power (the iDRAC reports no per-fan power) — modelled from RPM via the +fan affinity law (∝ RPM³), calibrated to the power sweep (~2 W floor → ~99 W full). The HA objects (helpers, the auto-revert automation, the REST sensors in `rest_resources/{idrac_redfish_exporter,fan_control}.yaml`, and the dashboard diff --git a/scripts/fan-control.sh b/scripts/fan-control.sh index e60ae1e6..07d16fa5 100644 --- a/scripts/fan-control.sh +++ b/scripts/fan-control.sh @@ -109,6 +109,12 @@ fc_pct_to_hex() { printf '0x%02x' "$1"; } # fc_clamp -> 0..100 fc_clamp() { local p="$1"; (( p < 0 )) && p=0; (( p > 100 )) && p=100; echo "$p"; } +# fc_fan_watts -> estimated TOTAL fan power (W). The iDRAC reports only +# total DCMI watts + RPM (no per-fan power), so this is a MODEL: fan power ∝ RPM³ +# (fan affinity law), calibrated to the 2026-06-05 power sweep — fits within ~3W +# (~2W @4800rpm · ~17W @9360 · ~42W @12720 · ~99W @16920). Integer: 0.0205·(rpm/1e3)³. +fc_fan_watts() { echo $(( $1 * $1 * $1 * 205 / 10000000000000 )); } + # fc_resolve -> pct # HA mode resolution (the hard ceiling is handled by the caller): # manual -> clamp(manual_pct), no hysteresis @@ -145,6 +151,10 @@ read_cpu_temp() { fc_parse_temp "$("$IPMITOOL" sdr type temperature 2>/dev/null | grep -E '^Temp ' | head -1)" } +read_fan_rpm() { # Fan1 RPM — representative (all 6 fans are set together) + "$IPMITOOL" sdr type fan 2>/dev/null | awk -F'|' '/^Fan1/{gsub(/[^0-9]/,"",$5); print $5+0; exit}' +} + presence_cache="cool"; presence_ts=0 get_presence() { local now; now="$(date +%s)" @@ -171,7 +181,7 @@ ha_entity_state() { fc_json_str_field "$resp" state } -push_metrics() { # +push_metrics() { # [fan_rpm] [fan_watts_est] [[ -z "$PUSHGATEWAY_URL" ]] && return 0 local mode_num; case "$3" in quiet) mode_num=1;; cool) mode_num=2;; manual) mode_num=3;; *) mode_num=0;; esac curl -fsS --max-time 5 --data-binary @- \ @@ -186,6 +196,10 @@ pve_fan_control_mode $mode_num pve_fan_control_ha_reachable $4 # TYPE pve_fan_control_fallback gauge pve_fan_control_fallback $5 +# TYPE pve_fan_control_fan_rpm gauge +pve_fan_control_fan_rpm ${6:-0} +# TYPE pve_fan_control_fan_watts_est gauge +pve_fan_control_fan_watts_est ${7:-0} EOF } @@ -239,7 +253,8 @@ main() { if set_manual "$pct"; then log "temp=${temp}C ha_mode=${ha_mode} eff=${eff} fan=${pct}% (was ${current}%)"; current="$pct" else log "WARN set_manual ${pct}% failed"; fi fi - push_metrics "$temp" "$current" "$eff" "$ha_ok" 0 + local rpm fan_w; rpm="$(read_fan_rpm)"; rpm="${rpm:-0}"; fan_w="$(fc_fan_watts "$rpm")" + push_metrics "$temp" "$current" "$eff" "$ha_ok" 0 "$rpm" "$fan_w" (( RUN_ONCE == 1 )) && break || sleep "$LOOP_INTERVAL" done } diff --git a/scripts/test-fan-control.sh b/scripts/test-fan-control.sh index 660a4147..a42e24a9 100644 --- a/scripts/test-fan-control.sh +++ b/scripts/test-fan-control.sh @@ -52,6 +52,13 @@ eq "resolve quiet=quiet curve" 73 "$(fc_resolve quiet 78 0 cool -1 3)" eq "resolve auto+empty=cool" 51 "$(fc_resolve auto 60 0 cool -1 3)" eq "resolve auto+present=quiet" 31 "$(fc_resolve auto 70 0 quiet -1 3)" +# --- fc_fan_watts: estimated fan power from RPM (cube-law, calibrated to the sweep) --- +eq "fan_watts 0" 0 "$(fc_fan_watts 0)" +eq "fan_watts 4800" 2 "$(fc_fan_watts 4800)" +eq "fan_watts 9360" 16 "$(fc_fan_watts 9360)" +eq "fan_watts 12720" 42 "$(fc_fan_watts 12720)" +eq "fan_watts 16920" 99 "$(fc_fan_watts 16920)" + # --- presence --- now=1000000 eq "presence open -> quiet" quiet "$(fc_presence_mode Отворена 0 $now 900 Отворена)"