Linux下性能预警及应急处理
自己编写一个脚本程序,该程序在工作时段(8:00-22:00),每隔一个小时执行一次;在其他时段每隔两小时执行一次。
每次执行都要完成如下任务:
1、检测进程CPU占用率,如果超出30%开始预警,超出80%就把该进程取消;
2、检测进程内存占用率,如果超出30%开始预警,超出70%就把该进程取消;
3、检测所有磁盘分区使用情况,如果超出90%就开始预警;
4、检测所有进程,如果发现僵尸进程,就人为取消该进程;
说明:所谓的预警就是把事件的性质、发生时间以及被处理掉的进程等重要信息统一输出到某个日志文件中。

一、[root@Shawn-T2 ~]# crontab -e
    加入以下命令:  *  8-22/1,22-7/2  *  *  *  sh  /monitor.sh
二、[root@Shawn-T2 ~]# vim monitor.sh
    添加以下内容:
#!/bin/bash
#This script is used for monitoring system load.
  time=`date +"%Y-%m-%d %H:%M:%S"`
  ps aux |  grep -v 'USER' >> /tmp/temp.txt
  while read -r line
  do
     declare -i CPUUsage=`echo $line | awk '{print $3}' | awk -F. ' {print $1}'`
     MEMUsage=`echo $line | awk '{print $4}' | awk -F. '{print $1}'`
     ProcessNum=`echo $line | awk '{print $2}'`
     STAT=`echo $line | awk '{print $8}'`
     Command=`echo $line | awk ' {print $11}'`
# Monitor CPU load.
     if [ $CPUUsage -ge 80 ];then
        kill -9 $ProcessNum
        echo "$time  Process $Command occupies too large CPU space and was killed!" >>  /var/log/monitor.log
     elif [ $CPUUsage -ge 30 ]; then
        echo "$time  Process $Command occupies too large ,please check it!" >>  /var/log/monitor.log
     fi
# Monitor Memory load.
     if [ $MEMUsage -ge 70 ];then
        kill -9 $ProcessNum
        echo "$time  Memory $Command occupies too large memory space and was killed!" >>  /var/log/monitor.log
     elif [ $CPUUsage -ge 30 ]; then
        echo "$time  Memory $Command occupies too large ,please check it!" >>  /var/log/monitor.log
     fi
# Monitor the zombie process.
     if [ "$STAT" == "Z" ]; then
     kill -9 $ProcessNum
     echo "$time Process $Command is zombie process and was killed." >>  /var/log/monotor.log
     fi
  done < /tmp/temp.txt
  rm -f /tmp/temp.txt

#Momitor the disk partition used space.
  df | grep -v Filesystem > /tmp/temp.txt
    while read -r line
    do
       declare -i DiskUsage=`echo $line | awk '{ if(NF==6){ print $5 }  else if(NF==5) { print $4 } }'| awk -F% '{print $1}'`
       DirName=`echo $line | awk '{ if(NF==6){ print $6 }  else if(NF==5) { print $5 } }'| awk -F% '{print $1}'`
       if [ $DiskUsage -ge 90 ];then
          echo "$time The partitipn which the directory $DirName mounted on occupies too large space ,please check it." >> /var/log/monitor.log
       fi
    done < /tmp/temp.txt
  rm -f /tmp/temp.txt