fan-control: continuous linear curve (replaces discrete step-bands)
Replace the step-band fan curve with a continuous linear ramp — the bands flapped at edges (e.g. 45<->65%). Web-researched: linear + 2-3C hysteresis is the homelab standard; PID is overkill for this slow thermal loop. fan% now interpolates between env-tunable anchors: COOL 50C/30% -> 83C/100% (~2.1%/C; ~51% at the ~60C equilibrium) QUIET 68C/20% -> 83C/100% (near-silent until ~70C) Both reach 100% at the 83C ceiling. Anti-oscillation: asymmetric hysteresis (fc_decide) + a MIN_STEP (3%) min-change threshold. 41 bash tests green; deployed + verified live (59C -> 49%, smooth). [ci skip] Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
945c1936e3
commit
324f2dc3bf
4 changed files with 67 additions and 62 deletions
|
|
@ -47,28 +47,31 @@ set -uo pipefail
|
|||
: "${DRY_RUN:=0}" # 1 => log IPMI actions instead of executing
|
||||
: "${RUN_ONCE:=0}" # 1 => one iteration then exit (testing)
|
||||
|
||||
# Curves as "min_temp:pct" entries, descending; first whose min_temp <= temp wins.
|
||||
# COOL is power-tuned (2026-06-05 power/temp sweep): the cooling-per-watt knee is
|
||||
# ~60% — beyond it airflow buys almost nothing (60->70% = +21W/-2°C, 70->100% =
|
||||
# +54W/0°C; the CPU floors ~59°C at cluster load). So the normal band caps at 60%
|
||||
# (~303W, ~61°C); 80/100% are a high-load safety ramp before the 83°C ceiling.
|
||||
COOL_CURVE=(79:100 73:80 64:60 55:50 0:30)
|
||||
QUIET_CURVE=(82:100 78:65 73:40 0:20)
|
||||
# Continuous LINEAR fan curve (2026-06-05): fan% ramps proportionally with CPU
|
||||
# temp between (T_LO,P_LO) and (T_HI,P_HI), clamped flat outside. Replaces the old
|
||||
# discrete step-bands (which flapped at band edges — e.g. 45<->65%). Both modes
|
||||
# reach 100% right at the 83°C ceiling. Anchors are env-tunable.
|
||||
# COOL (garage empty): 30% @50°C .. 100% @83°C (~2.1%/°C; equilibrium ~60°C/~51%)
|
||||
# QUIET (someone there): 20% @68°C .. 100% @83°C (near-silent until ~70°C)
|
||||
# Web-researched: a linear curve + 2-3°C hysteresis is the homelab standard; PID is
|
||||
# overkill for this slow thermal loop. See docs/plans/2026-06-04-pve-fan-control-design.md.
|
||||
: "${COOL_T_LO:=50}"; : "${COOL_P_LO:=30}"; : "${COOL_T_HI:=83}"; : "${COOL_P_HI:=100}"
|
||||
: "${QUIET_T_LO:=68}"; : "${QUIET_P_LO:=20}"; : "${QUIET_T_HI:=83}"; : "${QUIET_P_HI:=100}"
|
||||
: "${MIN_STEP:=3}" # min fan-% change worth an IPMI write (anti-jitter on the smooth curve)
|
||||
|
||||
log() { printf '%s %s\n' "$(date '+%Y-%m-%dT%H:%M:%S%z')" "$*"; }
|
||||
|
||||
# ---- pure functions (no side effects; unit-tested) ----
|
||||
|
||||
# fc_curve <mode> <temp> -> fan percent
|
||||
# fc_curve <mode> <temp> -> fan percent (continuous linear interpolation between
|
||||
# the per-mode (T_LO,P_LO)..(T_HI,P_HI) anchors; clamped flat outside the range).
|
||||
fc_curve() {
|
||||
local mode="$1" temp="$2"
|
||||
local -a curve
|
||||
if [[ "$mode" == "quiet" ]]; then curve=("${QUIET_CURVE[@]}"); else curve=("${COOL_CURVE[@]}"); fi
|
||||
local entry
|
||||
for entry in "${curve[@]}"; do
|
||||
if (( temp >= ${entry%%:*} )); then echo "${entry##*:}"; return 0; fi
|
||||
done
|
||||
echo "${curve[-1]##*:}"
|
||||
local mode="$1" temp="$2" tlo plo thi phi
|
||||
if [[ "$mode" == "quiet" ]]; then tlo=$QUIET_T_LO; plo=$QUIET_P_LO; thi=$QUIET_T_HI; phi=$QUIET_P_HI
|
||||
else tlo=$COOL_T_LO; plo=$COOL_P_LO; thi=$COOL_T_HI; phi=$COOL_P_HI; fi
|
||||
if (( temp <= tlo )); then echo "$plo"; return 0; fi
|
||||
if (( temp >= thi )); then echo "$phi"; return 0; fi
|
||||
echo $(( plo + ( (temp - tlo) * (phi - plo) + (thi - tlo) / 2 ) / (thi - tlo) )) # rounded
|
||||
}
|
||||
|
||||
# fc_decide <mode> <temp> <current_pct> <deadband> -> fan percent
|
||||
|
|
@ -230,7 +233,9 @@ main() {
|
|||
local presence="cool"; [[ "$ha_mode" == "auto" ]] && presence="$(get_presence)"
|
||||
local eff; if [[ "$ha_mode" == "manual" ]]; then eff="manual"; elif [[ "$ha_mode" == "auto" ]]; then eff="$presence"; else eff="$ha_mode"; fi
|
||||
local pct; pct="$(fc_resolve "$ha_mode" "$temp" "$manual_pct" "$presence" "$current" "$DEADBAND")"
|
||||
if (( pct != current )); then
|
||||
# Only write when first-run or the change clears MIN_STEP (kills 1-2% jitter
|
||||
# on the continuous curve; fc_decide already gives asymmetric hysteresis).
|
||||
if (( current < 0 || pct - current >= MIN_STEP || current - pct >= MIN_STEP )); then
|
||||
if set_manual "$pct"; then log "temp=${temp}C ha_mode=${ha_mode} eff=${eff} fan=${pct}% (was ${current}%)"; current="$pct"
|
||||
else log "WARN set_manual ${pct}% failed"; fi
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env bash
|
||||
# Unit tests for the pure functions in fan-control.sh.
|
||||
# Sources the script (main is guarded), exercises curve/decide/presence/parse.
|
||||
# Sources the script (main is guarded), exercises curve/decide/resolve/presence/parse.
|
||||
# Run: bash infra/scripts/test-fan-control.sh
|
||||
|
||||
set -uo pipefail
|
||||
|
|
@ -15,35 +15,31 @@ eq() { # <description> <expected> <actual>
|
|||
fi
|
||||
}
|
||||
|
||||
# --- COOL curve (power-tuned 2026-06-05: knee at 60%) ---
|
||||
eq "cool 40 -> 30" 30 "$(fc_curve cool 40)"
|
||||
eq "cool 54 -> 30" 30 "$(fc_curve cool 54)"
|
||||
eq "cool 55 -> 50" 50 "$(fc_curve cool 55)"
|
||||
eq "cool 63 -> 50" 50 "$(fc_curve cool 63)"
|
||||
eq "cool 64 -> 60" 60 "$(fc_curve cool 64)"
|
||||
eq "cool 72 -> 60" 60 "$(fc_curve cool 72)"
|
||||
eq "cool 73 -> 80" 80 "$(fc_curve cool 73)"
|
||||
eq "cool 78 -> 80" 80 "$(fc_curve cool 78)"
|
||||
eq "cool 79 -> 100" 100 "$(fc_curve cool 79)"
|
||||
eq "cool 91 -> 100" 100 "$(fc_curve cool 91)"
|
||||
# --- COOL curve (continuous linear: 30% @50C .. 100% @83C) ---
|
||||
eq "cool <=T_LO clamps" 30 "$(fc_curve cool 40)"
|
||||
eq "cool 50 -> 30" 30 "$(fc_curve cool 50)"
|
||||
eq "cool 55 -> 41" 41 "$(fc_curve cool 55)"
|
||||
eq "cool 60 -> 51" 51 "$(fc_curve cool 60)"
|
||||
eq "cool 64 -> 60" 60 "$(fc_curve cool 64)"
|
||||
eq "cool 70 -> 72" 72 "$(fc_curve cool 70)"
|
||||
eq "cool 75 -> 83" 83 "$(fc_curve cool 75)"
|
||||
eq "cool 83 -> 100" 100 "$(fc_curve cool 83)"
|
||||
eq "cool >=T_HI clamps" 100 "$(fc_curve cool 90)"
|
||||
|
||||
# --- QUIET curve ---
|
||||
eq "quiet 50 -> 20" 20 "$(fc_curve quiet 50)"
|
||||
eq "quiet 72 -> 20" 20 "$(fc_curve quiet 72)"
|
||||
eq "quiet 73 -> 40" 40 "$(fc_curve quiet 73)"
|
||||
eq "quiet 77 -> 40" 40 "$(fc_curve quiet 77)"
|
||||
eq "quiet 78 -> 65" 65 "$(fc_curve quiet 78)"
|
||||
eq "quiet 81 -> 65" 65 "$(fc_curve quiet 81)"
|
||||
eq "quiet 82 -> 100" 100 "$(fc_curve quiet 82)"
|
||||
# --- QUIET curve (continuous linear: 20% @68C .. 100% @83C) ---
|
||||
eq "quiet <=T_LO clamps" 20 "$(fc_curve quiet 60)"
|
||||
eq "quiet 68 -> 20" 20 "$(fc_curve quiet 68)"
|
||||
eq "quiet 70 -> 31" 31 "$(fc_curve quiet 70)"
|
||||
eq "quiet 75 -> 57" 57 "$(fc_curve quiet 75)"
|
||||
eq "quiet 80 -> 84" 84 "$(fc_curve quiet 80)"
|
||||
eq "quiet 83 -> 100" 100 "$(fc_curve quiet 83)"
|
||||
|
||||
# --- decide: hysteresis ---
|
||||
eq "decide uninit -> target" 60 "$(fc_decide cool 68 -1 3)"
|
||||
eq "decide ramp up now" 60 "$(fc_decide cool 68 25 3)"
|
||||
eq "decide equal holds" 60 "$(fc_decide cool 64 60 3)"
|
||||
eq "decide down held in band" 80 "$(fc_decide cool 70 80 3)" # 70+3=73 still 80% -> hold
|
||||
eq "decide down past band" 60 "$(fc_decide cool 69 80 3)" # 69+3=72 -> 60% < 80 -> drop
|
||||
eq "decide 100 holds" 100 "$(fc_decide cool 77 100 3)" # 77+3=80 -> 100 -> hold
|
||||
eq "decide 100 drops" 80 "$(fc_decide cool 75 100 3)" # 75+3=78 -> 80 < 100 -> drop
|
||||
# --- decide: asymmetric hysteresis (ramp up now, ease down only past the deadband) ---
|
||||
eq "decide uninit -> target" 68 "$(fc_decide cool 68 -1 3)"
|
||||
eq "decide ramp up now" 68 "$(fc_decide cool 68 25 3)"
|
||||
eq "decide equal holds" 62 "$(fc_decide cool 65 62 3)"
|
||||
eq "decide down held" 72 "$(fc_decide cool 68 72 3)" # curve(68)=68<72 but curve(71)=75 !<72 -> hold
|
||||
eq "decide down past" 60 "$(fc_decide cool 64 72 3)" # curve(64)=60, curve(67)=66<72 -> drop
|
||||
|
||||
# --- fc_clamp / fc_resolve: HA mode resolution ---
|
||||
eq "clamp over 100" 100 "$(fc_clamp 150)"
|
||||
|
|
@ -51,11 +47,10 @@ eq "clamp under 0" 0 "$(fc_clamp -5)"
|
|||
eq "clamp passthrough" 45 "$(fc_clamp 45)"
|
||||
eq "resolve manual=slider" 42 "$(fc_resolve manual 64 42 cool -1 3)"
|
||||
eq "resolve manual clamped" 100 "$(fc_resolve manual 64 150 cool -1 3)"
|
||||
eq "resolve cool=cool curve" 60 "$(fc_resolve cool 64 0 cool -1 3)"
|
||||
eq "resolve quiet=quiet curve" 65 "$(fc_resolve quiet 80 0 cool -1 3)"
|
||||
eq "resolve auto+empty=cool" 60 "$(fc_resolve auto 64 0 cool -1 3)"
|
||||
eq "resolve auto+present=quiet" 20 "$(fc_resolve auto 64 0 quiet -1 3)"
|
||||
eq "resolve cool hysteresis" 60 "$(fc_resolve cool 69 0 cool 80 3)"
|
||||
eq "resolve cool=cool curve" 51 "$(fc_resolve cool 60 0 cool -1 3)"
|
||||
eq "resolve quiet=quiet curve" 73 "$(fc_resolve quiet 78 0 cool -1 3)"
|
||||
eq "resolve auto+empty=cool" 51 "$(fc_resolve auto 60 0 cool -1 3)"
|
||||
eq "resolve auto+present=quiet" 31 "$(fc_resolve auto 70 0 quiet -1 3)"
|
||||
|
||||
# --- presence ---
|
||||
now=1000000
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue