安装
sudo apt install -y stress-ng lm-sensors smartmontools nvme-cli fio memtester git build-essential
sudo sensors-detect
sensors
CPU 半个小时
stress-ng --cpu 0 --cpu-method matrixprod --verify --metrics-brief --timeout 30m | tee cpu_stress_30m.log
内存一个小时
stress-ng --vm 4 --vm-bytes 75% --vm-method all --verify --metrics-brief --timeout 1h | tee mem_stress_1h.log
gpu半个小时
git clone https://github.com/wilicc/gpu-burn.git
cd gpu-burn
make
./gpu_burn 1800 | tee gpu_burn_30m.log
make COMPUTE=12.0
跑的时候看GPU
watch -n 1 nvidia-smi
watch -n 1 'date; echo "===== CPU / BOARD ="; sensors; echo; echo "= GPU ====="; nvidia-smi'
跑完看log
dmesg -T | grep -iE "error|fail|thermal|xid|nvrm|mce|hardware"