Skip to content

Instantly share code, notes, and snippets.

@dhensen
Created January 16, 2026 00:38
Show Gist options
  • Select an option

  • Save dhensen/3fa18cf7beb03c758d4b024663fd4c29 to your computer and use it in GitHub Desktop.

Select an option

Save dhensen/3fa18cf7beb03c758d4b024663fd4c29 to your computer and use it in GitHub Desktop.
brute force script to fight against motherboard EC resetting my fan curve in the quest of preventing my MI50 from overheating
# write this to /etc/systemd/system/mi50-it8792-pwm3.service, then
# sudo systemctl daemon-reload
# sudo systemctl enable --now mi50-it8792-pwm3.service
[Unit]
Description=MI50 HBM temp → IT8792 pwm3 fan control (hammer)
After=multi-user.target
StartLimitIntervalSec=0
[Service]
Type=simple
ExecStart=/usr/local/bin/mi50-memtemp-to-it8792-pwm3.sh
Restart=always
RestartSec=0.5
Nice=-10
IOSchedulingClass=best-effort
IOSchedulingPriority=0
# optional hardening (safe with sysfs writes)
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=full
ProtectHome=true
[Install]
WantedBy=multi-user.target
#!/usr/bin/env bash
# create /usr/local/bin/mi50-memtemp-to-it8792-pwm3.sh
set -euo pipefail
# Control IT8792 pwm3 (SYS_FAN4) from MI50 HBM temp (amdgpu mem):
# TEMP: /sys/class/hwmon/hwmon2/temp3_input (millidegC)
# PWM : /sys/class/hwmon/hwmon4/pwm3 (0..255)
#
# Because the IT8792/firmware fights pwm writes, we "hammer" at a tight interval.
#
# Usage:
# sudo mi50-memtemp-to-it8792-pwm3.sh
# sudo mi50-memtemp-to-it8792-pwm3.sh --dry-run
#
# Tuning via env:
# INTERVAL=0.01
# TEMP_IN=/sys/class/hwmon/hwmon2/temp3_input
# PWM_OUT=/sys/class/hwmon/hwmon4/pwm3
# PWM_EN=/sys/class/hwmon/hwmon4/pwm3_enable
# PWM_AUTO_START=/sys/class/hwmon/hwmon4/pwm3_auto_start
#
# Curve (degC -> pwm):
# T_MIN=45 PWM_MIN=90
# T_MAX=100 PWM_MAX=255
# Optional "kick" to ensure spin-up:
# KICK_PWM=180 KICK_SECS=0.4
#
# Stop: Ctrl+C
DRY_RUN=0
if [[ "${1:-}" == "--dry-run" ]]; then
DRY_RUN=1
fi
INTERVAL="${INTERVAL:-0.01}"
TEMP_IN="${TEMP_IN:-/sys/class/hwmon/hwmon2/temp3_input}"
PWM_OUT="${PWM_OUT:-/sys/class/hwmon/hwmon4/pwm3}"
PWM_EN="${PWM_EN:-/sys/class/hwmon/hwmon4/pwm3_enable}"
PWM_AUTO_START="${PWM_AUTO_START:-/sys/class/hwmon/hwmon4/pwm3_auto_start}"
T_MIN="${T_MIN:-45}"
T_MAX="${T_MAX:-100}"
PWM_MIN="${PWM_MIN:-90}"
PWM_MAX="${PWM_MAX:-255}"
KICK_PWM="${KICK_PWM:-180}"
KICK_SECS="${KICK_SECS:-0.4}"
die(){ echo "ERROR: $*" >&2; exit 1; }
[[ -r "$TEMP_IN" ]] || die "missing/unreadable TEMP_IN: $TEMP_IN"
[[ -w "$PWM_OUT" ]] || die "missing/unwritable PWM_OUT: $PWM_OUT"
[[ -w "$PWM_EN" ]] || die "missing/unwritable PWM_EN: $PWM_EN"
[[ -w "$PWM_AUTO_START" ]] || die "missing/unwritable PWM_AUTO_START: $PWM_AUTO_START"
for v in T_MIN T_MAX PWM_MIN PWM_MAX KICK_PWM; do
[[ "${!v}" =~ ^[0-9]+$ ]] || die "$v must be integer"
done
(( T_MAX > T_MIN )) || die "T_MAX must be > T_MIN"
(( PWM_MAX >= PWM_MIN )) || die "PWM_MAX must be >= PWM_MIN"
(( PWM_MIN >= 0 && PWM_MAX <= 255 )) || die "PWM range must be within 0..255"
(( KICK_PWM >= 0 && KICK_PWM <= 255 )) || die "KICK_PWM must be 0..255"
awk -v s="$INTERVAL" 'BEGIN{exit !(s>0)}' || die "INTERVAL must be > 0 seconds"
awk -v s="$KICK_SECS" 'BEGIN{exit !(s>=0)}' || die "KICK_SECS must be >= 0"
write_file() {
local path="$1" val="$2"
if (( DRY_RUN )); then
echo "DRY: echo $val > $path"
else
echo "$val" > "$path"
fi
}
# Ensure safe fallback + manual mode
write_file "$PWM_AUTO_START" 255 || true
write_file "$PWM_EN" 1 || true
cleanup() {
# Keep fallback max on exit so it won't drop to 24.
( echo 255 > "$PWM_AUTO_START" ) 2>/dev/null || true
}
trap cleanup EXIT INT TERM
last_pwm=-1
calc_pwm() {
local t="$1" # degC integer
if (( t <= T_MIN )); then
echo "$PWM_MIN"
return
fi
if (( t >= T_MAX )); then
echo "$PWM_MAX"
return
fi
# linear interpolation:
# pwm = PWM_MIN + (t - T_MIN) * (PWM_MAX - PWM_MIN) / (T_MAX - T_MIN)
local num=$(( (t - T_MIN) * (PWM_MAX - PWM_MIN) ))
local den=$(( T_MAX - T_MIN ))
echo $(( PWM_MIN + num / den ))
}
kick_if_needed() {
local target="$1"
# if target is low but fan might stall, give a short kick upward
if (( target < KICK_PWM )) && awk -v s="$KICK_SECS" 'BEGIN{exit !(s>0)}'; then
write_file "$PWM_OUT" "$KICK_PWM"
sleep "$KICK_SECS"
fi
}
echo "MI50 mem temp -> IT8792 pwm3"
echo "TEMP_IN=$TEMP_IN PWM_OUT=$PWM_OUT"
echo "Curve: ${T_MIN}C=>${PWM_MIN} .. ${T_MAX}C=>${PWM_MAX}, interval=${INTERVAL}s"
(( DRY_RUN )) && echo "DRY RUN enabled"
while true; do
# temp is millidegC
raw="$(cat "$TEMP_IN")" || raw=""
[[ "$raw" =~ ^-?[0-9]+$ ]] || raw=0
# convert to degC (integer)
t=$(( raw / 1000 ))
pwm="$(calc_pwm "$t")"
# keep manual + fallback safe (cheap writes; you WANT this on this board)
write_file "$PWM_AUTO_START" 255 || true
write_file "$PWM_EN" 1 || true
if (( pwm != last_pwm )); then
# optional kick to avoid stalls at low duty
kick_if_needed "$pwm"
write_file "$PWM_OUT" "$pwm"
last_pwm="$pwm"
echo "$(date +'%H:%M:%S.%3N') t=${t}C pwm=${pwm}"
else
# still hammer pwm to beat the IT8792 fight
write_file "$PWM_OUT" "$pwm"
fi
sleep "$INTERVAL"
done
@dhensen
Copy link
Author

dhensen commented Jan 16, 2026

Script is probably better off without call to kick_if_needed on l139.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment