最近跑模型,参考了一些资料,写了一个脚本,用来查看GPU上跑的程序。
nvidia-smi,ps -ef | grep python,ps -aux | grep python,htop,top
什么的使用起来不够简洁,用这个脚本太方便了。
可以使用watch -n 1 -d bash check.sh
运行。
注意:
- 需要
pip install gpustat
安装gpustat
- 需要有
nvidia-smi
#!/bin/bash
function cpu() {
util=$(vmstat | awk '{if(NR==3)print $13+$14}')
iowait=$(vmstat | awk '{if(NR==3)print $16}')
printf '+---------------------------------------------------------------+\n'
echo "| CPU使用率:${util}% |"
}
function memory() {
total=$(free -m | awk '{if(NR==2)printf "%.1f",$2/1024}')
used=$(free -m | awk '{if(NR==2) printf "%.1f",($2-$NF)/1024}')
printf '+---------------------------------------------------------------+\n'
echo "| 内存使用率:${used}G/${total}G |"
}
function details() {
printf '+---------+---------+----------+----------+-----------+---------+\n'
printf '| %7s | %7s | %7s | %7s | %7s | %7s |\n' "pid" "name" "cpu" "memory" "rss" "time"
printf '+---------+---------+----------+----------+-----------+---------+\n'
for pid in $(nvidia-smi | awk '$4 == "C" {print $3}'); do
name=$(ps aux | grep $pid | awk -v pid=$pid '$2==pid {print $1}')
cpu=$(ps aux | grep $pid | awk -v pid=$pid '$2==pid {printf "%.2f", $3}')
memory=$(ps aux | grep $pid | awk -v pid=$pid '$2==pid {printf "%.2f", $4}')
rss=$(ps aux | grep $pid | awk -v pid=$pid '$2==pid {printf "%.2f", ($6)/1048576}')
time=$(ps aux | grep $pid | awk -v pid=$pid '$2==pid {print $10}')
printf '| %7s | %7s | %7s%% | %7s%% | %7sGB | %7s |\n' "$pid" "$name" "$cpu" "$memory" "$rss" "$time"
done
printf '+---------+---------+----------+----------+-----------+---------+\n'
}
cpu
memory
details
# 需要pip install gpustat安装gpustat
gpustat