Java程序CPU异常监控bash脚本
契机
思路
jps -lv | grep java | grep -v Jps | awk '{print $1}'
top -b -n 1 -p $pid | grep $pid | awk '{print $9}'
jstack $pid > "/xxx.txt"
java -jar arthas-boot.jar --select ./$jar_name -f .xx.sh
dashboard -n 5 -i 3000
jmap -dump:format=b,file=/mnt/$pid.hprof $pid
成品
运行命令
nohup ./monitor.sh >> ./monitor.log 2>&1 &
主脚本:monitor.sh
#!/bin/bash
while true
do
echo "$(date): 监控java"
timestamp=$(date +%Y%m%d%H%M%S)
for pid in $(jps -lv | grep java | grep -v Jps | awk '{print $1}')
do
cpu=$(top -b -n 1 -p $pid | grep $pid | awk '{print $9}')
mem=$(top -b -n 1 -p $pid | grep $pid | awk '{print $10}')
jar_name=$(jps | grep $pid | awk '{print $2}')
echo "$(date): PID $pid - CPU : $cpu - Memory : $mem - 应用: $jar_name"
if [ $(echo "$cpu > 100" | bc) -eq 1 ]; then
echo "$(date): $jar_name , PID $pid cpu占用 $cpu 立即打印jstack arthas火焰图 arthas线程"
cd /root/prod
echo "$(date): 生成jstack文件 $jar_name $pid"
jstack $pid > "/mnt/${jar_name}_${pid}_${timestamp}_jstack.txt"
echo "$(date): 生成arthas文件 $jar_name $pid"
java -jar arthas-boot.jar --select ./$jar_name -f ./arthas-profiler.sh
mv /mnt/thread10.txt "/mnt/${jar_name}_${pid}_${timestamp}_thread10.txt"
fi
if [ $(echo "$cpu > 300" | bc) -eq 1 ] && [[ "$jar_name" != *"xxxx"* ]]; then
echo "$(date): $jar_name , PID $pid cpu占用 $cpu 立即重启,并且打印dump文件"
cd /root/prod
echo "$(date): 生成dump文件 $pid"
jmap -dump:format=b,file=/mnt/$pid.hprof $pid
./deploy.sh restart $jar_name
fi
done
echo "$(date): -----------------------------------------------------------------------------"
sleep 60
done
副脚本:arthas-profiler.sh
thread -n 10 >> /mnt/thread10.txt
profiler start
dashboard -n 5 -i 3000
profiler stop
stop
总结
- arthas无法使用sleep命令
- dump文件很难分析cpu占用,主要靠火焰图,top,dashboard
- 这里是瞬间cpu占用,60秒一次一直扫
写到最后