(一)、服务cpu巡检
CPU_THRESHOLD=2
CUR_TIME=$(date "+%Y-%m-%d %H:%M:%S")
BATH_PATH="$( cd "$( dirname "$0" )" && pwd )/logs"
mkdir -p $BATH_PATH
LOG_FILE="$BATH_PATH"/$(date +%Y%m%d)".log"
CPU_USE=`top -b -n 1 | grep Cpu | awk '{print $2}' | cut -f 1 -d "%" `
MEM_TOTAL=`free | grep Mem | awk '{print $2}'`
MEM_USE_TOTAL=`free | grep Mem | awk '{print $3}'`
MEM_USE_RATE=`awk 'BEGIN{printf "%.1f%%\n",('$MEM_USE_TOTAL'/'$MEM_TOTAL')*100}'`
# 查询cpu占用最高的进程
MAX_PID=`ps -eo user,pid,pcpu,pmem,args --sort=-pcpu |head -11 | awk 'FNR>=2 && FNR<=2{print $2};FNR==8{exit}'`
echo $MAX_PID >> $LOG_FILE
echo "----------------start------------------" >> $LOG_FILE
echo "当前时间:$CUR_TIME" >> $LOG_FILE
echo "当前cpu使用率:$CPU_USE" >> $LOG_FILE
echo "当前服务器内存:$MEM_TOTAL,已使用:$MEM_USE_TOTAL,当前内存占用率:$MEM_USE_RATE" >> $LOG_FILE
CPU_USE_INT=`echo $CPU_USE | awk -F. '{print $1}'`
if [ $CPU_USE_INT -gt $CPU_THRESHOLD ]; then
echo 'result:WARNING' >> $LOG_FILE
echo "详情信息:" >> $LOG_FILE
echo "$(ps -eo pid,pcpu,pmem,args --sort=-pcpu | head -n 5)" >> $LOG_FILE
echo "$(ps -eo pid,pcpu,pmem,args --sort=-pmem | head -n 3)" >> $LOG_FILE
APP_PID=$MAX_PID
APP_TID=$( top -Hp $APP_PID| awk 'FNR>=8 && FNR<=8{print $2};FNR==8{exit}')
APP_TID_X16=`printf %x $APP_TID`
CUR_CPU=$( top -Hp $APP_PID| awk 'FNR>=8 && FNR<=8{print $10};FNR==8{exit}')
APP_PWD=$(ls -l /proc/$APP_PID/cwd |awk -F " " '{print $11}')
echo "捕捉到CPU飙高服务进程:$APP_PID,线程:$APP_TID(%x:$APP_TID_X16),当前线程cpu:$CUR_CPU,服务位置:$APP_PWD" >> $LOG_FILE
jstack -l $APP_PID |grep $APP_TID_X16 >> $LOG_FILE
else
echo "result:NORMAL" >> $LOG_FILE
fi
echo "----------------end------------------" >> $LOG_FILE
echo '' >> $LOG_FILE
echo '' >> $LOG_FILE
(二)、内存巡检
total=$(free -m | sed -n '2p' | awk '{print $2}')
used=$(free -m | sed -n '2p' | awk '{print $3}')
free=$(free -m | sed -n '2p' | awk '{print $4}')
shared=$(free -m | sed -n '2p' | awk '{print $5}')
buff=$(free -m | sed -n '2p' | awk '{print $6}')
cached=$(free -m | sed -n '2p' | awk '{print $7}')
rate=`echo "scale=2;$used/$total" | bc | awk -F. '{print $2}'`
echo -e "total\tused\tfree\tshared\tbuffer\tavailable"
echo -e "${total}M\t${used}M\t${free}M\t${shared}M\t${buff}M\t${cached}M\nrate:${rate}%"
file_name="/mnt/memory_"$(date "+%Y-%m-%d")".log"
current_time=$(date "+%Y-%m-%d %H:%M:%S")
echo "$current_time巡检当前服务内存状况" >> $file_name
echo "USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND" >> $file_name
ps aux|head -1;ps aux|grep -v PID|sort -rn -k +4|head >> $file_name
echo " " >> $file_name
echo "---------------------" >> $file_name
echo -e "total\tused\tfree\tshared\tbuffer\tavailable" >> $file_name
echo -e "${total}M\t${used}M\t${free}M\t${shared}M\t${buff}M\t${cached}M\nrate:${rate}%" >> $file_name
echo "------本次巡检结束---" >> $file_name
if [ $rate -ge 50 ]
then
echo "危险" >> $file_name
else
echo "正常" >> $file_name
fi
echo "---------------------" >> $file_name
echo "" >> $file_name
echo "" >> $file_name
(三)、编辑秒级定时任务
vim /etc/crontab
* * * * * /mnt/monitor.sh
* * * * * sleep 5; /mnt/monitor.sh
* * * * * sleep 10; /mnt/monitor.sh
* * * * * sleep 15; /mnt/monitor.sh
* * * * * sleep 20; /mnt/monitor.sh
* * * * * sleep 25; /mnt/monitor.sh
* * * * * sleep 30; /mnt/monitor.sh
* * * * * sleep 35; /mnt/monitor.sh
* * * * * sleep 40; /mnt/monitor.sh
* * * * * sleep 45; /mnt/monitor.sh
* * * * * sleep 50; /mnt/monitor.sh
* * * * * sleep 55; /mnt/monitor.sh
crontab /etc/crontab
crontab -l