通过Java进程id,对超过指定CPU占用率的线程进行监控,并打印输出堆栈信息,方便问题排查。
#! /usr/bin/bash
JAVA=${JAVA_HOME}/bin
PID=
INTERVAL=5
LOG_PATH=$(cd `dirname $0`; pwd)"/cpu.log"
CPU_MAX=0
while getopts ":p:j:t:out:c:" opt; do
case $opt in
p)
# 进程 PID
PID=$OPTARG
;;
j)
# JAVA HOME
JAVA=$OPTARG"/bin"
;;
t)
# 间隔时间(单位:s)
INTERVAL=$OPTARG
;;
f)
# 日志输出位置
LOG_PATH=$OPTARG
;;
c)
# 超过此值得CPU占用被打印
CPU_MAX=$OPTARG
;;
esac
done
# 30:黑
# 31:红
# 32:绿
# 33:黄
# 34:蓝色
# 35:紫色
# 36:深绿
# 37:白色
function echoInfo(){
if [ ! -n $2 ]; then
echo -e $1
else
echo -e "\033[$2m---$1\033[0m"
fi
}
function echoError(){
echo -e "\033[31m$1\033[0m"
}
function errorExit(){
echoError "$1"
echoError "stop"
}
echoInfo "java home: ${JAVA}" 36
echoInfo "interval: ${INTERVAL}s" 36
echoInfo "log output path: ${LOG_PATH}" 36
# 判断pid是否指定
if [ -z $PID ]; then
echoError "Pid is not specified, Please use the '-p' to specify"
exit 1
fi
# 判断pid是否存在
PID_EXIT=`ps -aux|awk '{print $2}'|grep -w ${PID}`
if [ -z $PID_EXIT ]; then
echoError "Pid is not exist, Please re-enter"
exit 1
fi
# 判断java环境
if [ ! -x "${JAVA}/jps" ]; then
echoError "Java Home is not found, please user the '-j' configure"
exit 1
fi
echo "start monitoring pid : ${PID}"
# 默认记录前20行,可自行修改
function log(){
echo $1"记录"
cat >> ${LOG_PATH} <<EOF
>>>$1
`date "+%Y-%m-%d %H:%M:%S"`
`${JAVA}/jstack -l ${PID} | grep -A 20 nid=${1}`
---------------------------------------------------------------------------------------------------------
EOF
}
function watchCpu() {
TOP_LINE=`top -b -sHp $PID -n 1 -i |awk 'NR>=8{if($9 >= '$CPU_MAX'){print $1, $9;}}'`
PIDS=(${TOP_LINE})
NIDS=()
N_INDEX=0
for i in "${!PIDS[@]}"; do
if [ `expr $i % 2` == 0 ]; then
Ax=`echo "obase=16;${PIDS[i]}"|bc`
NIDS[N_INDEX]="0x${Ax,,}"
((N_INDEX++))
fi
done
if [ ${#NIDS[@]} == 0 ]; then
echo "nids is null"
continue
fi
echo "**************************split*****************************" >> ${LOG_PATH} 2>$1
for nid in ${NIDS[@]}
do
log ${nid}
done
}
while ((1)); do
watchCpu
sleep ${INTERVAL}
done;
脚本文件:watch.sh
1、文件赋权
chmod 777 watch.sh
2、文件执行
nohup ./watch.sh -p 11291 -c 20
解释:
-p 监控进程id为11291的Java程序
-c 线程cpu占用率超过20的打印输出堆栈信息
占用率以 “$top -Hp 11291” 的CPU%为参考
日志信息示例
>>>0x55d0
2022-09-30 23:30:54
"http-nio-9090-exec-10" #27 daemon prio=5 os_prio=0 tid=0x00007f8520f50000 nid=0x55d0 waiting on condition [0x00007f84ed9b2000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000edc53a20> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
at org.apache.tomcat.util.threads.TaskQueue.take(TaskQueue.java:120)
at org.apache.tomcat.util.threads.TaskQueue.take(TaskQueue.java:33)
at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1074)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1134)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)
at java.lang.Thread.run(Thread.java:748)Locked ownable synchronizers:
- None"http-nio-9090-exec-9" #26 daemon prio=5 os_prio=0 tid=0x00007f852101a000 nid=0x55cf waiting on condition [0x00007f84edab3000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
---------------------------------------------------------------------------------------------------------
>>>0x55d1
2022-09-30 23:30:54
"http-nio-9090-Poller" #28 daemon prio=5 os_prio=0 tid=0x00007f85210c2000 nid=0x55d1 runnable [0x00007f84ed8b1000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)
at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:93)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:86)
- locked <0x00000000edc53850> (a sun.nio.ch.Util$3)
- locked <0x00000000edc53860> (a java.util.Collections$UnmodifiableSet)
- locked <0x00000000edc53808> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:97)
at org.apache.tomcat.util.net.NioEndpoint$Poller.run(NioEndpoint.java:788)
at java.lang.Thread.run(Thread.java:748)Locked ownable synchronizers:
- None"http-nio-9090-exec-10" #27 daemon prio=5 os_prio=0 tid=0x00007f8520f50000 nid=0x55d0 waiting on condition [0x00007f84ed9b2000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000edc53a20> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
---------------------------------------------------------------------------------------------------------