Skip to content

Instantly share code, notes, and snippets.

@ddk50
Created September 12, 2025 08:17
Show Gist options
  • Select an option

  • Save ddk50/8a1420f0ef7c309c47b7450219291756 to your computer and use it in GitHub Desktop.

Select an option

Save ddk50/8a1420f0ef7c309c47b7450219291756 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
# smart_audit.sh - SMART全台チェック&総合判定(OK/WARN/ALERT)
# deps: smartmontools, (optional) mailutils
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
set -euo pipefail
LC_ALL=C
# =========================
# Config
# =========================
MAIL_TO="${MAIL_TO:-}" # 環境変数で宛先指定。空なら標準出力のみ
AGE_HOURS=26280 # 3年=26280h 以上で WARN
TEMP_WARN=45 # ℃ 警告
TEMP_CRIT=50 # ℃ 危険
CRC_WARN=1 # UDMA_CRC_Error_Count >=1 で WARN
HOST="$(hostname -s)"
NOW="$(date '+%F %T %Z')"
ALERT=0
WARN=0
OUT="$(mktemp)"
# =========================
# Helpers
# =========================
ydh() { # hours -> "XyYdZh"
local h=$1 y d
y=$(( h / 8760 ))
d=$(( (h % 8760) / 24 ))
h=$(( h % 24 ))
printf "%dy%dd%dh" "$y" "$d" "$h"
}
byid_for() { # prefer by-id 表示名
local b; b="$(basename "$1")"
ls -l /dev/disk/by-id/ 2>/dev/null | awk -v t="$b" '$NF==t{print "/dev/disk/by-id/"$9; exit}'
}
print_header() {
echo "SMART Audit Report - ${HOST} (${NOW})"
echo
printf "%-5s %-6s %-24s %-18s %-22s %8s %8s %-11s %6s %6s %6s %6s %6s %6s %s\n" \
"Stat" "Type" "MODEL" "SERIAL" "DEVICE" "TBW" "HOURS" "(~y d h)" "TEMP" "ReAl" "Pend" "Unco" "CRC" "Wear%" "Notes"
echo "---------------------------------------------------------------------------------------------------------------------------------------------------------------"
}
print_summary() {
echo
echo "Summary:"
(( ALERT )) && echo "- ALERT: あり(至急対応推奨)"
(( WARN )) && echo "- WARN : あり(計画的に対処)"
(( ALERT==0 && WARN==0 )) && echo "- すべてOK"
echo
echo "Policy:"
echo "- 3年(26280h)超えたHDDは予防交換候補。"
echo "- Reallocated/Pending/Uncorrectable > 0 は即バックアップ&交換検討。"
echo "- 温度Warn≥${TEMP_WARN}℃ / Crit≥${TEMP_CRIT}℃。CRC>0はケーブル/ポート疑い。"
}
# =========================
# Assessors
# =========================
assess_ata() { # args: device_path (by-id推奨)
local dev="$1" info data model serial poh temp realloc pend unco crc notes status
info="$(smartctl -i "$dev" 2>/dev/null || true)"
data="$(smartctl -A "$dev" 2>/dev/null || true)"
model="$(awk -F: '/Device Model|Model Number|Model/{gsub(/^[ \t]+/,"",$2); m=$2} END{print m}' <<<"$info")"
serial="$(awk -F: '/Serial Number/{gsub(/^[ \t]+/,"",$2); print $2}' <<<"$info")"
poh="$(awk '$1==9 && $2 ~ /Power_On_Hours/ {print $10}' <<<"$data")"; : "${poh:=0}"
temp="$(awk '/Temperature_Celsius|Temperature/ {t=$10?$10:$2} END{if(t)print t; else print "-"}' <<<"$data")"
realloc="$(awk '$2 ~ /Reallocated_Sector_Ct/ {print $10}' <<<"$data")"; : "${realloc:=0}"
pend="$(awk '$2 ~ /Current_Pending_Sector/ {print $10}' <<<"$data")"; : "${pend:=0}"
unco="$(awk '$2 ~ /Offline_Uncorrectable/ {print $10}' <<<"$data")"; : "${unco:=0}"
crc="$(awk '$2 ~ /UDMA_CRC_Error_Count/ {print $10}' <<<"$data")"; : "${crc:=0}"
notes=""; status="OK"
# 温度
if [[ "$temp" != "-" ]]; then
if (( temp >= TEMP_CRIT )); then notes+="[TEMP-CRIT]"; status="ALERT"; ALERT=1
elif (( temp >= TEMP_WARN )); then notes+="[TEMP-WARN]"; [[ "$status" == "OK" ]] && status="WARN"; WARN=1
fi
fi
# 不良関連
if (( pend>0 || unco>0 )); then
(( pend>0 )) && notes+="[PENDING]"
(( unco>0 )) && notes+="[UNCO]"
status="ALERT"; ALERT=1
fi
if (( realloc>0 )); then notes+="[REALLOC]"; [[ "$status" == "OK" ]] && status="WARN" && WARN=1; fi
if (( crc>=CRC_WARN )); then notes+="[CRC]"; [[ "$status" == "OK" ]] && status="WARN" && WARN=1; fi
if (( poh>=AGE_HOURS )); then notes+="[AGE≥3y]"; [[ "$status" == "OK" ]] && status="WARN" && WARN=1; fi
[[ -z "$notes" ]] && notes="-"
printf "%-5s %-6s %-24s %-18s %-22s %8s %8d %-11s %6s %6d %6d %6d %6d %6s %s\n" \
"$status" "HDD" "${model:0:24}" "${serial:0:18}" "$(basename "$dev")" \
"-" "$poh" "$(ydh "$poh")" "$temp" "$realloc" "$pend" "$unco" "$crc" "-" "$notes"
}
assess_nvme() { # args: /dev/nvmeX
local dev="$1" info data model serial temp_c poh pct_used media_err ctrl_err duw_units tbw notes status wear_str
info="$(smartctl -i -H "$dev" 2>/dev/null || true)"
data="$(smartctl -A "$dev" 2>/dev/null || true)"
model="$(awk -F: '/Model Number/{gsub(/^[ \t]+/,"",$2); print $2}' <<<"$info")"
serial="$(awk -F: '/Serial Number/{gsub(/^[ \t]+/,"",$2); print $2}' <<<"$info")"
temp_c="$(awk -F: '/^[[:space:]]*Temperature:/{gsub(/[^0-9]/,""); print $0}' <<<"$data" | head -1)"; : "${temp_c:=0}"
poh="$(awk -F: '/Power On Hours/{gsub(/[^0-9]/,"",$2); print $2}' <<<"$data")"; : "${poh:=0}"
pct_used="$(awk -F: '/Percentage Used/{gsub(/[^0-9]/,"",$2); print $2}' <<<"$data")"; : "${pct_used:=0}"
media_err="$(awk -F: '/Media and Data Integrity Errors/{gsub(/[^0-9]/,"",$2); print $2}' <<<"$data")"; : "${media_err:=0}"
ctrl_err="$(awk -F: '/Error Information Log Entries/{gsub(/[^0-9]/,"",$2); print $2}' <<<"$data")"; : "${ctrl_err:=0}"
# Data Units Written → 10進TB(1 unit = 512,000 bytes = 0.000000512 TB)
duw_units="$(
awk -F: '/Data Units Written/{
s=$2; sub(/\[.*/,"",s); gsub(/[^0-9]/,"",s); print s
}' <<<"$data"
)"; : "${duw_units:=0}"
tbw="$(awk -v n="${duw_units}" 'BEGIN{ printf("%.1fTB", n * 0.000000512) }')"
notes=""; status="OK"
if (( media_err>0 )); then status="ALERT"; ALERT=1; notes+="[MEDIA-ERR]"; fi
if (( temp_c>=80 )); then status="ALERT"; ALERT=1; notes+="[TEMP-CRIT]"
elif (( temp_c>=70 )); then [[ "$status" == "OK" ]] && status="WARN"; WARN=1; notes+="[TEMP-WARN]"
fi
if (( pct_used>=90 )); then [[ "$status" == "OK" ]] && status="WARN"; WARN=1; notes+="[WEAR≥90%]"; fi
[[ -z "$notes" ]] && notes="-"
wear_str="${pct_used}%"
printf "%-5s %-6s %-24s %-18s %-22s %8s %8d %-11s %6s %6s %6s %6s %6s %6s %s\n" \
"$status" "NVMe" "${model:0:24}" "${serial:0:18}" "$(basename "${dev}n1")" \
"${tbw}" "$poh" "$(ydh "$poh")" "$temp_c" "-" "-" "-" "-" "$wear_str" "$notes"
}
# =========================
# Discovery
# =========================
discover_disks() {
mapfile -t DISKS < <(
lsblk -ndo NAME,TYPE \
| awk '$2=="disk"{print $1}' \
| grep -Ev '^(zd|loop|ram|sr|dm-|md|nbd|zram)' \
| awk '{print "/dev/"$1}'
)
}
# =========================
# Main
# =========================
main() {
print_header >> "$OUT"
discover_disks
{
for dev in "${DISKS[@]}"; do
if [[ "$dev" =~ ^/dev/nvme ]]; then
assess_nvme "$dev"
else
local disp; disp="$(byid_for "$dev")"; [[ -z "$disp" ]] && disp="$dev"
assess_ata "$disp"
fi
done
} | sort -k1,1 -k3,3 >> "$OUT"
print_summary >> "$OUT"
local subj="${HOST} SMART audit"
(( ALERT )) && subj="[ALERT] ${subj}"
(( ! ALERT && WARN )) && subj="[WARN] ${subj}"
if [[ -n "${MAIL_TO}" ]] && command -v mail >/dev/null 2>&1; then
mail -s "$subj" "$MAIL_TO" < "$OUT"
else
cat "$OUT"
fi
rm -f "$OUT"
(( ALERT )) && return 2
(( WARN )) && return 1
return 0
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment