Linux下性能预警及应急处理
自己编写一个脚本程序,该程序在工作时段(8:00-22:00),每隔一个小时执行一次;在其他时段每隔两小时执行一次。
每次执行都要完成如下任务:
1、检测进程CPU占用率,如果超出30%开始预警,超出80%就把该进程取消;
2、检测进程内存占用率,如果超出30%开始预警,超出70%就把该进程取消;
3、检测所有磁盘分区使用情况,如果超出90%就开始预警;
4、检测所有进程,如果发现僵尸进程,就人为取消该进程;
说明:所谓的预警就是把事件的性质、发生时间以及被处理掉的进程等重要信息统一输出到某个日志文件中。
一、[root@Shawn-T2 ~]# crontab -e
加入以下命令: * 8-22/1,22-7/2 * * * sh /monitor.sh
二、[root@Shawn-T2 ~]# vim monitor.sh
添加以下内容:
#!/bin/bash
#This script is used for monitoring system load.
time=`date +"%Y-%m-%d %H:%M:%S"`
ps aux | grep -v 'USER' >> /tmp/temp.txt
while read -r line
do
declare -i CPUUsage=`echo $line | awk '{print $3}' | awk -F. ' {print $1}'`
MEMUsage=`echo $line | awk '{print $4}' | awk -F. '{print $1}'`
ProcessNum=`echo $line | awk '{print $2}'`
STAT=`echo $line | awk '{print $8}'`
Command=`echo $line | awk ' {print $11}'`
# Monitor CPU load.
if [ $CPUUsage -ge 80 ];then
kill -9 $ProcessNum
echo "$time Process $Command occupies too large CPU space and was killed!" >> /var/log/monitor.log
elif [ $CPUUsage -ge 30 ]; then
echo "$time Process $Command occupies too large ,please check it!" >> /var/log/monitor.log
fi
# Monitor Memory load.
if [ $MEMUsage -ge 70 ];then
kill -9 $ProcessNum
echo "$time Memory $Command occupies too large memory space and was killed!" >> /var/log/monitor.log
elif [ $CPUUsage -ge 30 ]; then
echo "$time Memory $Command occupies too large ,please check it!" >> /var/log/monitor.log
fi
# Monitor the zombie process.
if [ "$STAT" == "Z" ]; then
kill -9 $ProcessNum
echo "$time Process $Command is zombie process and was killed." >> /var/log/monotor.log
fi
done < /tmp/temp.txt
rm -f /tmp/temp.txt
#Momitor the disk partition used space.
df | grep -v Filesystem > /tmp/temp.txt
while read -r line
do
declare -i DiskUsage=`echo $line | awk '{ if(NF==6){ print $5 } else if(NF==5) { print $4 } }'| awk -F% '{print $1}'`
DirName=`echo $line | awk '{ if(NF==6){ print $6 } else if(NF==5) { print $5 } }'| awk -F% '{print $1}'`
if [ $DiskUsage -ge 90 ];then
echo "$time The partitipn which the directory $DirName mounted on occupies too large space ,please check it." >> /var/log/monitor.log
fi
done < /tmp/temp.txt
rm -f /tmp/temp.txt