系统工具箱脚本内容

#!/bin/bash
##系统工具箱脚本
##目前包括的功能有:
##系统负载;cpu使用率;内存使用率;swap使用情况;处于D/R状态的进程;系统inode号占用情况
##系统目录占用大小;系统io使用情况

##1.对输入的参数进行判断
##如果输入的参数为空或是help的话,会输出说明
if [ "$1" = "" ] || [ "$1" = "help" ];then
  cat <<EOF
Usage: systatus [ options ] num1 num2
Options:
    all: print all parameters status
    clo: print "cpu load" status
    cuse: print "cpu used" status
    muse: print "memory used" status
    swu: print "swap used" status
    psdr: print "process status in D&&R" status
    ino: print "system inode used" status
    size: print "directory size status(/,secondary directory,top3)" status
    iot: print "io usage(disk,process)" status 
    fde: print "system file descriptor" status
num1: systools execution interval of time
num2: systools number of executions
EOF
  exit 10
fi

##2.判断参数是否符合要求,不符合要求进行提示并退出执行
##1).arrA:输入的所有参数
##2).脚本规定的所有参数
arrA=($1 $2 $3 $4 $5 $6 $7 $8 $9 ${10} ${11} ${12})
arrB=(clo cuse muse swu psdr ino size iot fde all)

##3).判断$1是否为数字,是数字的话退出脚本
junum0=`echo $1 | sed s/[0-9]//g`
if [ ! -n "$junum0" ];then
  echo "please print value"
  exit 10
fi

##4).判断输入的参数是否为脚本规定的参数,不是的话退出脚本执行
for i in ${arrA[*]};do
  juvalue=`echo "${arrB[*]}" | grep -w "$i"`
  junum1=`echo $i | sed 's/[0-9]//g'`
  if [ -n "$juvalue" ] || [ ! -n "$junum1" ];then
    :
  else
    echo "$i" is not exist
    exit 10
  fi
done

##5).判断指令格式是否正确,不正确退出脚本执行
##抓取执行间隔时间,执行次数的信息
vanum=0
for j in ${arrA[*]};do
  juva=`echo "$j" | sed 's/[0-9]//g'` 
  if [ -n "$juva" ] && [ "$vanum" = 1 ];then
    echo "print value error"
    exit 10
  elif [ -n "$j" ] && [ "$vanum" = 2 ];then
    echo "print value error"
    exit 10
  fi

  if [[ $j =~ [0-9] ]];then
    let vanum++
    if [ "$vanum" = 1 ];then
      extime=$j
    elif [ "$vanum" = 2 ];then
      lofre=$j
    fi
  fi
done
if [ "$vanum" = 0 ];then
  extime=5
  lofre=5
elif [ "$vanum" = 1 ];then
  lofre=5
fi

##3.本次脚本执行生成的日志
##1).系统状态输出的日志
logname="/var/log/systatus/systatus.log-$(date +%F-%H-%M-%S)"
##2).工具箱运行状态日志
toolslog="/var/sysadmin_toolbox/other/systools/logs/systool.log-`date +%F`"

##3).工具箱运行日志
if [ ! -d "/var/sysadmin_toolbox/other/systools/logs" ];then
  mkdir -p /var/sysadmin_toolbox/other/systools/logs
fi

##4).建立日志文件目录
if [ ! -d "/var/log/systatus" ];then
  mkdir -p /var/log/systatus
fi

##4.建立相关目录
##建立临时目录,供存放相关数据进行分析用
if [ ! -d "/var/sysadmin_toolbox/other/systools/tmp" ];then
  mkdir -p /var/sysadmin_toolbox/other/systools/tmp
fi

##脚本执行pid号
pid_lo=`echo $$`

##本次执行生成的临时文件存放目录名称
tempdir="/var/sysadmin_toolbox/other/systools/tmp/systmp_`date +%F-%H-%M-%S`"
mkdir $tempdir

##5.识别信号
#trap "rm -rf $tempdir && exit 10" 2
##1).HUP信号,远程连接关闭继续在后台运行
trap ":" 1

#trap "rm -rf $tempdir" EXIT TSTP TERM

##2).TERM TSTP EXIT信号,删除临时文件;输出脚本退出的信息(命令行,日志)
trap "rm -rf $tempdir &&
      echo "" &&
      echo 'The script running is exit,please check log in details: ' && 
      echo $toolslog &&
      echo 'Warn:Systools exit running''(pid: $pid_lo)' >> $toolslog &&
      if [ -f "$logname" ];then
        echo 'systatus log file:' `echo $logname`
        echo 'systatus log location: $logname (pid: $pid_lo)' >> $toolslog   
      fi &&
      exit 10" TERM TSTP EXIT

echo -e "\033[1;32m##############system status check begin###############\033[0m"
echo ""
echo "Systools is running begin,time: `date +%F_%H:%M:%S` (pid: $pid_lo)" >> $toolslog
sleep 2

##5.判断根分区剩余容量是否符合要求
##1).剩余空间50M以下退出脚本运行
echo -e "\033[1msystools space check: \033[0m"
echo "systools space check:" >> $toolslog
space=`df -m / | awk '{print $4}' | tail -1`
if [ "$space" -le 50 ];then 
  echo "Root partition free space: !!abnormal!!"
  echo "Error: Root partition free space: abnormal (pid: $pid_lo)" >> $toolslog
  exit 10
else 
  echo "Root partition free space: ok"
  echo "Root partition free space: ok (pid: $pid_lo)" >> $toolslog
fi

##2).判断目录大小,大于50M停止运行
if [ -d "/var/log/systatus/" ];then
  dirsize=`du -s /var/log/systatus | awk '{print $1}'` 
  if [ "$dirsize" -ge 51200 ];then
    echo "systatus log director size: !!abnormal!!"
    echo "Error: Systatus log director size: abnormal (pid: $pid_lo)" >> $toolslog
    exit 10
  else
    echo "systatus log director size: ok"
    echo "systatus log director size: ok (pid: $pid_lo)" >> $toolslog
  fi
fi
echo ""
sleep 2

##6.判断是否有之前执行的脚本进程未退出
##对脚本运行的进程号进行过滤
##由执行用户决定是否杀掉之前长期未结束的相关进程
##使用kill -USR1将之前执行的脚本进程停止
oldtmp=`ls /var/sysadmin_toolbox/other/systools/tmp/ | wc -l`
protmp="$tempdir/pro.tmp"
ps -ef | grep 'systatus' | grep -v 'grep' | grep -v "$pid_lo" >> $protmp
pid_new=`cat $protmp | awk '{print $2}'`
if [ -n "$pid_new" ];then
  echo "old systatus process:"
  cat $protmp | sed '/^$/d'
  read -p "process is exist,whether to kill[y/n]" jucon
  for i in $pid_new;do
    if [ "$jucon" = y ];then
      kill -USR1 $i
      sleep 5
      ps -ef | grep 'systatus' | grep -v 'grep' | grep -w "$i" &>/dev/null
      if [ $? -ne 0 ];then
        echo "pid $i: is stopped in single \"USR1\""  ##single USR1
        echo "pid $i is stoped (single:USR1)" >> $toolslog
        sleep 2
      else
        kill -9 $i
        if [ $? -eq 0 ];then
          echo "$i is killed in single 9"  ##single9,kill,kill -USR2不起作用时执行
          echo "pid:$i is killed (single:9)" >> $toolslog
          sleep 2
        else
          echo "!!$i: is not killed,please check!!"
          echo "Error: pid $i not to killed" >> $toolslog
          sleep 2
        fi
      fi
    elif [ "$jucon" = n ];then
      echo "continue to run $0"
      break
    else
      echo 'print error'
      exit 10
    fi
  done
echo ""
fi

date=`date +%F_%H:%M:%S`

##3.日志清理策略,
##对一天以前生成的日志进行清理
dellog=`find /var/log/systatus/* -type f -mtime 1 -o -mtime +1 2>/dev/null`
if [ -n "$dellog" ];then
  for log in $dellog;do
    rm -rf $log
  done
echo ""
echo "Old systatus log clean finished"
echo "Old systatus log clean finished" >> $toolslog
fi

dellogt=`find /var/sysadmin_toolbox/other/systools/logs/* -type f -mtime +5 2>/dev/null`
if [ -n "$dellogt" ];then
  for logt in $dellogt;do
    rm -rf $logt
  done
echo ""
echo "Old systools run log clean finished"
echo "Old systools run log clean finished" >> $toolslog
fi

##5.输出检查时间:执行时间,系统版本,pid,ip的信息
##1).系统版本号,判断是suse还是redhat系统
if [ -f "/etc/redhat-release" ];then
  sysver=`cat /etc/redhat-release`
else
  sysver=`cat /etc/Suse-release | head -1`
fi

##2).输出系统基础信息
echo ""
echo -e "\033[1mbase information: \033[0m"
echo "check time: $date"
echo "run pid: $pid_lo"
echo "ip: `ifconfig -a | grep -v 127.0.0.1 | grep inet | grep -v inet6 | awk '{print $2}' | head -1`"
echo "system version: $sysver" 
echo "" 
echo -e "\033[1mInspection items: \033[0m"
sleep 2

lonum=1
swnum=1
sinum=1
finum=1
while [ "$lonum" -le "$lofre" ];do
##在循环执行的每一次执行前,判断如果日志文件大于20M,为了防止循环执行时脚本过大,退出脚本执行
if [ -f "$logname" ];then
  filesize=`du -s $logname | awk '{print $1}'`
  if [ "$filesize" -ge 20480 ];then
    echo "!!Systatus log is more than 20M!!"
    echo "Error: Systatus log is more than 20M,exit to run script" >> $toolslog
    exit 10
  fi
fi

##6.每个检查项情况
##1).cpu load情况
clo(){
##获取1分钟;5分钟;15分钟负载情况
load1=`uptime | awk -F"[, ]+" '{print $(NF-2)}'`
load5=`uptime | awk -F"[, ]+" '{print $(NF-1)}'`
load15=`uptime | awk -F"[, ]+" '{print $NF}'`

##获取逻辑cpu数量;物理cpu数量;cpu核数
log_cpu=`cat /proc/cpuinfo| grep "processor"| wc -l`
phy_cpu=`cat /proc/cpuinfo| grep "physical id"| sort| uniq| wc -l`
cpu_core=`cat /proc/cpuinfo| grep "cpu cores"| uniq | awk -F":" '{print $2}'`

##对cpuload数值进行计算,并与逻辑cpu个数进行比较
ave_load=`printf "%.2f" "$(echo "scale=2;((${load1}+${load5}+${load15})/3)" | bc)"`
load_com=`expr "$ave_load > $log_cpu" | bc`

echo -e "\033[1;35m-------------check cpu load------------\033[0m"
sleep 2
echo "###cpu load status###" >> $logname
echo "time: `date +%F_%H:%M:%S`"
echo "time: `date +%F_%H:%M:%S`" >> $logname
sleep 2
echo ""
echo "" >> $logname
#echo -e "\033[1;30;47m####cpu load status####\033[0m" 
##输出逻辑cpu个数及1,5,15min的cpu负载情况
echo "logical cpu num: $log_cpu"
echo "logical cpu num: $log_cpu" >> $logname
echo "cpu load status: 1m $load1, 5m $load5, 15m $load15"
echo "cpu load status: 1m $load1, 5m $load5, 15m $load15" >> $logname
echo "" 
echo ""  >> $logname
#sleep 2
}

##2).cpu使用率情况
cuse(){
echo -e "\033[1;35m-------------check cpu used------------\033[0m"
sleep 2
echo "###cpu used status###" >> $logname
echo "time: `date +%F_%H:%M:%S`"
echo "time: `date +%F_%H:%M:%S`" >> $logname
#sleep 2
echo ""
echo "" >> $logname

cpufile="$tempdir/cuse.tmp"
sarfile="$tempdir/sar.tmp"
echo "" > $cpufile
echo "" > $sarfile

##通过sar命令获取系统cpu使用率
sar 1 3 >> $sarfile
##cpu使用率的各项指标
cpu_user=`printf "%.1f" $(cat $sarfile | tail -1 | awk '{print $3}')` ##%user
cpu_nice=`printf "%.1f" $(cat $sarfile | tail -1 | awk '{print $4}')` ##%nice
cpu_sys=`printf "%.1f" $(cat $sarfile | tail -1 | awk '{print $5}')`  ##%sys
cpu_io=`printf "%.1f" $(cat $sarfile | tail -1 | awk '{print $6}')`   ##%iowait
cpu_st=`printf "%.1f" $(cat $sarfile | tail -1 | awk '{print $7}')`   ##%steal
cpu_used=`printf "%.1f" $(echo "scale=2;($cpu_user+$cpu_nice+$cpu_sys+$cpu_io+$cpu_st)" | bc)`

##对系统cpu使用率进行判断
##50%-60%;60%-70%;70%以上
if [ "`echo "$cpu_used >= 50" | bc`" -eq 1 ] && [ "`echo "$cpu_used < 60" | bc`" -eq 1 ];then
  cpu_level="!relatively high!"    ##50%=<cpu per used<60%
elif [ "`echo "$cpu_used >= 60" | bc`" -eq 1 ] && [ "`echo "$cpu_used < 70" | bc`" -eq 1 ];then
  cpu_level="!!high!!"        ##60%=<cpu per used<70%
elif [ "`echo "$cpu_used >= 70" | bc`" -eq 1 ];then
  cpu_level="!!!extremely high!!!"     ##70%=<cpu per used
else 
  cpu_level="normal"        ##cpu per used<50%
fi

##输出cpu使用率
echo "CPU usage(all): $cpu_used%" 
echo "CPU usage(all): $cpu_used%" >> $logname
echo "Composition: %user $cpu_user, %nice $cpu_nice, %sys $cpu_sys, %io $cpu_io %steal $cpu_st"
echo "Composition: %user $cpu_user, %nice $cpu_nice, %sys $cpu_sys, %io $cpu_io %steal $cpu_st" >> $logname
echo "" 
echo ""  >> $logname
echo "detection result: $cpu_level" 
echo "detection result: $cpu_level" >> $logname
echo "" 
echo "" >> $logname
sleep 2

##获取cpu使用率前10的线程,输出到临时文件中
#ps H -eo pid,tid,etimes,%cpu,comm --no-headers --sort -%cpu | head -10 >> $cpufile
ps H -eo pid,tid,stime,%cpu,comm --no-headers --sort -%cpu | head -10 >> $cpufile
echo "Top 10 cpu usage(current):"
echo "Top 10 cpu usage(current):" >> $logname
printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" "NAME(THREAD)" "TID" "PID" "STIME" "%CPU"
printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" "NAME(THREAD)" "TID" "PID" "STIME" "%CPU" >> $logname
##在cpu情况统计的临时文件中抓取出pid,tid,运行时间,cpu使用率,线程名称的信息,并进行输出
for i in `cat $cpufile | sed '/^$/d' | awk '{print $2}'`;do
  cline1=`awk -v a="$i" '$2==a {print $1}' $cpufile`  ##pid
  cline2=`awk -v a="$i" '$2==a {print $2}' $cpufile`  ##tid
  cline3=`awk -v a="$i" '$2==a {print $3}' $cpufile`  ##运行时间
  cline4=`awk -v a="$i" '$2==a {print $4}' $cpufile`  ##cpu使用率
  cline5=`awk -v a="$i" '$2==a {print $5}' $cpufile`  ##线程名称
  printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" $cline5 $cline2 $cline1 $cline3 $cline4%
  printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" $cline5 $cline2 $cline1 $cline3 $cline4% >> $logname
#  sleep 1
done
echo ""
echo "" >> $logname
#sleep 2
}


##3).内存使用率情况
muse(){
freefile="$tempdir/free.tmp"
memfile="$tempdir/psmem.tmp"
echo "" > $freefile
echo "" > $memfile

echo -e "\033[1;35m-----------check memory used-----------\033[0m"
sleep 2
echo "###memory used status###" >> $logname
echo "time: `date +%F_%H:%M:%S`"
echo "time: `date +%F_%H:%M:%S`" >> $logname
sleep 1
echo "" 
echo ""  >> $logname

##通过free命令抓取出内存数据输入到临时文件中
##按照监控的计算方式进行取值,目前监控的已使用内存计算方式是free命令的used数值加上meminfo文件
##中的shmem数值
        free -k >> $freefile
##获取内存使用率排名前10的线程输出到临时文件中
        ps -eo pid,stime,%mem,comm --no-headers --sort -%mem | head -10 >> $memfile
        grep "cache:" $freefile > /dev/null
        if [[ $? -eq 0 ]];then   ##centos6;suse计算方式
                usedMem1=`cat $freefile|grep "cache:" |awk '{print $3}'`
                freeMem=`cat $freefile|grep "cache:" |awk '{print $4}'`
                shmem=`cat /proc/meminfo |grep -w Shmem|awk '{print $2}'`
                usedMem=`expr $usedMem1 + $shmem`
        else    ##redhat7计算方式
                usedMem1=`cat $freefile|grep "Mem:" |awk '{print $3}'`  
                freeMem=`cat $freefile|grep "Mem:" |awk '{print $4}'`   
                shmem=`cat /proc/meminfo |grep -w Shmem|awk '{print $2}'`   
                usedMem=`expr $usedMem1 + $shmem`      ##内存占用used+shmem
        fi
        totalMem=`cat $freefile|grep "Mem:" |awk '{print $2}'`
        usedMemPct=`expr $usedMem \* 100 / $totalMem `
        freeMemPct=`expr 100 - $usedMemPct `

if [ "$usedMemPct" -ge 50 ] && [ "$usedMemPct" -lt 60 ];then
  mem_level="!relatively high!"    ##50%=<mem per used<60%
elif [ "$usedMemPct" -ge 60 ] && [ "$usedMemPct" -lt 70 ];then
  mem_level="!!high!!"  ##60%=<mem per used<70%
elif [ "$usedMemPct" -ge 70 ];then
  mem_level="!!!extremely high!!!"  ##70%=<mem per used
else
  mem_level="normal"    ##mem per used<50%
fi

echo "memory usage: ${usedMemPct}%" 
echo "memory usage: ${usedMemPct}%"  >> $logname 
echo "detection result: $mem_level" 
echo "detection result: $mem_level"  >> $logname 
echo ""  
echo ""  >> $logname
sleep 2

echo "Top 10 memory usage:" 
echo "Top 10 memory usage:"  >> $logname
printf "%-20s %-10s %-10s %-10s %-10s\n" "NAME(process)" "PID" "STIME" "%MEM"
printf "%-20s %-10s %-10s %-10s %-10s\n" "NAME(process)" "PID" "STIME" "%MEM" >> $logname
for i in `cat $memfile | sed '/^$/d' | awk '{print $1}'`;do
  mline1=`awk -v a="$i" '$1==a {print $1}' $memfile`  ##pid
  mline2=`awk -v a="$i" '$1==a {print $2}' $memfile`  ##运行时间
  mline3=`awk -v a="$i" '$1==a {print $3}' $memfile`  ##内存使用率
  mline4=`awk -v a="$i" '$1==a {print $4}' $memfile`  ##进程名称
  printf "%-20s %-10s %-10s %-10s %-10s\n" $mline4 $mline1 $mline2 $mline3%
  printf "%-20s %-10s %-10s %-10s %-10s\n" $mline4 $mline1 $mline2 $mline3% >> $logname
#  sleep 1
done
echo ""
echo "" >> $logname
#sleep 2
}

##swap分区使用率 
swu(){
##swap利用率执行次数
swapfile="$tempdir/swa.tmp"
pswapfile="$tempdir/pswa.tmp"
echo -e "\033[1;35m------------check swap used------------\033[0m"
sleep 2
echo "###swap used status###" >> $logname
echo "time: `date +%F_%H:%M:%S`"
echo "time: `date +%F_%H:%M:%S`" >> $logname
sleep 2
echo ""
echo "" >> $logname

echo "" > $swapfile
echo "" > $pswapfile

##swap分区总大小、已使用
free -k >> $swapfile
swato=`cat $swapfile | tail -1 | awk '{print $2}'` ##swap总空间大小
if [ "$swato" != 0 ];then
  swaused=`cat $swapfile | tail -1 | awk '{print $3}'`  #swap已使用大小
  swapuser=`printf "%.1f" "$(echo "scale=3;($swaused/$swato)*100" | bc)" 2>/dev/null`  ##swap利用率
  vmst=`vmstat`
  swin=`echo "$vmst" | tail -1 | awk '{print $7}'`
  swout=`echo "$vmst" | tail -1 | awk '{print $8}'`
fi

##判断swap分区是否开启
if [ "$swato" = 0 ];then
  swapsta="--close--"
  swstan=0
else
  swapsta="open"
  swstan=1
fi

echo "swap status: $swapsta"
if [ "$swstan" = 0 ];then
  echo ""
  echo "" >> $logname
  return 1
else
  echo "swap total: `free -h | tail -1 | awk '{print $2}'`" 
  echo "swap total: `free -h | tail -1 | awk '{print $2}'`"  >> $logname
  echo "swap used: $swapuser%"
  echo "swap used: $swapuser%" >> $logname
  if [ "`echo "$swapuser >= 20" | bc`" -eq 1 ] && [ "`echo "$swapuser < 30" | bc`" -eq 1 ];then  ##swap占用率大于20%
    swcheck="!relatively high!"  
  elif [ "`echo "$swapuser >= 30" | bc`" -eq 1 ] && [ "`echo "$swapuser < 40" | bc`" -eq 1 ];then  ##swap占用率大于30%
    swcheck="!!high!!"
  elif [ "`echo "$swapuser >= 40" | bc`" -eq 1 ];then    ##swap占用率大于40%
    swcheck="!!!extremely high!!!"
  else
    swcheck="normal"    
  fi

  echo "detection result: $swcheck"
  echo "detection result: $swcheck" >> $logname
  echo "" 
  echo "" >> $logname
  echo "swap in(kb/s) $swin, swap out(kb/s) $swout"
  echo "swap in(kb/s) $swin, swap out(kb/s) $swout" >> $logname
  echo "" 
  echo "" >> $logname
fi

if [ "$swapuser" = '0.0' ];then
  echo ""
  echo "" >> $logname
  return 1
fi

sleep 2

##判断时间间隔,距离上次执行小于30min,不执行获取每个进程占用swap分区的状态
##当前时间戳
if [ "$swnum" -gt 1 ];then
  curstm=`date -d "$(date)" +%s`
  difstm=`printf "%.0f" $(scale=1;echo "$curstm-$oldsta" | bc)`
  if [ "`echo "$difstm >= 1800" | bc`" = 0 ];then 
    echo "process swap usage check time interval less than 30min in last check"
    echo "process swap usage check time interval less than 30min in last check" >> $logname
    echo ""
    echo "" >> $logname
    return 1
  fi
fi

##进程占用swap分区的百分比
getswap(){
SUM=0
swato=`free -m | tail -1 | awk '{print $2}'`
for DIR in `find /proc/ -maxdepth 1 -type d | egrep "^/proc/[0-9]"`;do
    PID=`echo $DIR | cut -d / -f 3`
    PROGNAME=`ps -p $PID -o comm --no-headers`
    for SWAP in `grep Swap $DIR/smaps 2>/dev/null| awk '{ print $2 }'`;do
        let SUM=$SUM+$SWAP
#        sleep 2
    done
       SUM_M=`printf "%.1f" "$(echo "scale=4;($SUM/1024)" | bc)"`
       swrtime=`ps -A -o pid,stime | awk -v a="$PID" '$1==a {print $2}'`
    printf "%-20s %-10s %-10s %-13s %-15s\n" "$PROGNAME" "$PID" "$swrtime" "${SUM_M}M" `printf "%.1f" "$(echo "scale=4;($SUM_M/$swato)*100" | bc)"`"%"
    SUM=0
#    sleep 1
done
}
swexc=`getswap | sort -nr -k4 | head -10`

echo "Top 10 in swap usage:"  
echo "excute num: $swnum"
echo "Top 10 in swap usage:"  >> $logname
echo "excute num: $swnum"  >> $logname
printf "%-20s %-10s %-10s %-10s %-15s\n" "NAME(PROCESS)" "PID" "STIME" "USED_VALUE(M)" "USED(%)" 
printf "%-20s %-10s %-10s %-10s %-15s\n" "NAME(PROCESS)" "PID" "STIME" "USED_VALUE(M)" "USED(%)" >> $logname
echo "$swexc"
echo "$swexc" >> $logname
echo ""    
echo ""  >> $logname
oldsta=`date -d "$(date)" +%s`
let swnum++
}

##进程D状态信息
psdr(){
psdfile="$tempdir/psd.tmp"
psrfile="$tempdir/psr.tmp"
echo "" > $psdfile
echo "" > $psrfile

echo -e "\033[1;35m-----check process in D&&R status------\033[0m"
sleep 2
echo "###process in D&&R status###" >> $logname
echo "time: `date +%F_%H:%M:%S`"
echo "time: `date +%F_%H:%M:%S`" >> $logname 
sleep 2
echo ""
echo "" >> $logname

##获取D状态进程
statd(){
  numd=0
  for pidd in `ls /proc | grep "^[0-9]"`;do
    if [ "$pidd" -gt 0 ] 2>/dev/null;then
      rund=`cat /proc/$pidd/status 2>/dev/null | grep "disk sleep" | wc -l`
      if [ "$rund" -gt 0 ] 2>/dev/null;then           
        runtimed=`ps -A -o pid,stime | awk -v var1=$pidd '$1==var1{print $2}'`
        tasknamed=`cat /proc/$pidd/status 2>/dev/null| grep Name | awk -F" " '{print $2}'`
        wchand=`cat /proc/$pidd/wchan`
        if [[ "$wchand" != "" ]] && [[ "$runtimed" != "" ]] && [[ "$tasknamed" != "" ]];then
          numd=$(($numd+1))
          printf "%-10s %-20s %-15s %-15s\n" "$pidd" "$tasknamed" "$runtimed" "$wchand" >> $psdfile
        else
          continue
        fi
      fi
    fi
#sleep 1
done
echo "dnum:$numd" >> $psdfile
}
statd

tonumd=`grep "dnum"  $psdfile | awk -F":" '{print $2}'`
echo "---process in D state---"
echo "---process in D state---" >> $logname
echo "D state number: $tonumd"
echo "D state number: $tonumd" >> $logname
if [ "$tonumd" = 0 ];then
  echo "detection result: not exist"
  echo "detection result: not exist" >> $logname
else
  echo "detection result: !!exist!!"
  echo "detection result: !!exist!!" >> $logname
fi
echo ""
echo "" >> $logname

if [ "$tonumd" -gt 0 ];then
  echo "PROCESS DETAIL(D):"
  printf "%-10s %-20s %-15s %-15s\n" "PID" "NAME" "STIME" "WCHAN" 
  printf "%-10s %-20s %-15s %-15s\n" "PID" "NAME" "STIME" "WCHAN" >> $logname
  cat $psdfile | grep -v "dnum" | sed '/^$/d'
  cat $psdfile | grep -v "dnum" | sed '/^$/d' >> $logname
fi
echo ""
echo "" >> $logname
sleep 2

##获取R状态进程
statr(){
rnum=0
for pidr in `ls /proc`;do
  if [ "$pidr" -gt 0 ] 2>/dev/null;then
    runr=`cat /proc/$pidr/status 2>/dev/null| grep "running" | wc -l`
    if [ "$runr" -gt 0 ] 2>/dev/null;then
      runtimer=`ps -A -o pid,stime | awk -v var1=$pidr '$1==var1{print $2}'`
      tasknamer="`cat /proc/$pidr/status 2>/dev/null | grep Name | awk -F" " '{print $2}'`"
      wchanr=`cat /proc/$pidr/wchan`
      if [[ "$runtimer" != "" ]] && [[ "$tasknamer" != "" ]] && [[ "$wchanr" != "" ]];then
        rnum=$(($rnum+1))
        printf "%-10s %-20s %-15s %-15s\n" "$pidr" "$tasknamer" "$runtimer" "$wchanr" >> $psrfile
      fi
    fi
  fi
#  sleep 1
done
echo "rnum:$rnum" >> $psrfile
}
statr

tonumr=`grep "rnum" $psrfile | awk -F":" '{print $2}'`
echo "---process in R state---"
echo "---process in R state---" >> $logname
echo "R state number: $tonumr"
echo "R state number: $tonumr" >> $logname
if [ "$tonumr" -gt 0 ];then
  echo ""
  echo "" >> $logname
  echo "PROCESS DETAIL(R):"
  echo "PROCESS DETAIL(R):" >> $logname
  printf "%-10s %-20s %-15s %-15s\n" "PID" "NAME" "STIME" "WCHAN"
  printf "%-10s %-20s %-15s %-15s\n" "PID" "NAME" "STIME" "WCHAN" >> $logname
  cat $psrfile | grep -v "rnum" | sed '/^$/d'
  cat $psrfile | grep -v "rnum" | sed '/^$/d' >> $logname
fi
echo ""
echo "" >> $logname
#sleep 2
}

##inode号
ino(){
echo -e "\033[1;35m------------check inode used-----------\033[0m"
sleep 2
echo "###inode used status###" >> $logname
echo "time: `date +%F_%H:%M:%S`"
echo "time: `date +%F_%H:%M:%S`" >> $logname
sleep 2
echo ""
echo "" >> $logname
inodefile="$tempdir/ino.tmp"
echo "" > $inodefile
df -i | awk 'NR>2{print line}{line=$0} END{print line}' | sort -nr -k5 | head -3 >> $inodefile

echo "inode usage(top 3):"
echo "inode usage(top 3):" >> $logname
printf "%-20s %-10s\n" "MOUNT" "USED(%)"
printf "%-20s %-10s\n" "MOUNT" "USED(%)" >> $logname
for i in `cat $inodefile | awk '{print $6}'`;do
  inodeuse=`awk -v a="$i" '$6==a{print $5}' $inodefile`
  printf "%-20s %-10s\n" $i $inodeuse
  printf "%-20s %-10s\n" $i $inodeuse >> $logname
  sleep 1
done
echo ""
echo "" >> $logname
sleep 2
}

##获取占用空间前3的目录
##获取到2级目录下空间大小占用前3的目录
size(){
echo -e "\033[1;35m------check directory size status------\033[0m"
echo "###directory size status###" >> $logname
echo "time: `date +%F_%H:%M:%S`"
echo "time: `date +%F_%H:%M:%S`" >> $logname 
sleep 2
echo ""
echo "" >> $logname

sizetmp="$tempdir/size.tmp"

##判断执行时间是否间隔30min
if [ "$lonum" -gt 1 ];then
  cursiti=`date -d "$(date)" +%s`
  difsiva=`printf "%.0f" $(scale=1;echo "$cursiti-$oldstasi" | bc)`
  if [ "`echo "$difsiva>=1800" | bc`" = 0 ];then
    echo "directory size status check time interval less than 30min in last check"
    echo "directory size status check time interval less than 30min in last check" >> $logname
    echo ""
    echo "" >> $logname
    return 1
  fi
fi

echo "" > $sizetmp
##/下空间占用排名前3的目录
du -sh /* 2>/dev/null | sort -rh -k1 | head -3 >> $sizetmp
echo "directory space usage(top 3,secondary directory):" 
echo "excute num: $sinum"
echo "directory space usage(top 3,secondary directory):" >> $logname
echo "excute num: $sinum"  >> $logname
for i in `cat $sizetmp | awk '{print $2}'`;do
  ##获取/下占用空间前3的目录排序
  sizechil=`du -sh $i/* | sort -rh -k1 | head -3` 
  sizet=`grep -w $i $sizetmp | awk '{print $1}'`
  printf "%-10s %-10s\n" "$i" "$sizet" 
  printf "%-10s %-10s\n" "$i" "$sizet" >> $logname
  sleep 1
  ##获取/下占用空间前3的目录下,占用空间前3的二级目录
  for j in `echo "$sizechil" | awk '{print $2}'`;do
    sizec=`echo "$sizechil" | grep -w "$j" | awk '{print $1}'`
    printf "%-25s %-10s\n" " --- $j" "$sizec" 
    printf "%-25s %-10s\n" " --- $j" "$sizec" >> $logname
  done
  echo ""
  echo "" >> $logname
sleep 1
done
let sinum++
oldstasi=`date -d "$(date)" +%s`
}

##系统进程io情况
iot(){
echo -e "\033[1;35m---------check system io status--------\033[0m"
sleep 2
echo "###system io status###" >> $logname
echo "time: `date +%F_%H:%M:%S`"
echo "time: `date +%F_%H:%M:%S`" >> $logname 
sleep 2
echo ""
echo "" >> $logname

##生成的临时文件
diskio="$tempdir/diskio.tmp"
proio="$tempdir/proio.tmp"
echo '' > $diskio
echo '' > $proio

echo "---disk io status---"
echo "---disk io status---" >> $logname
##磁盘io情况
diskname=`lsblk | grep '^[a-z]d[a-z]' | awk '{print $1}'`
iow=`iostat -x | grep -A 1 'avg-cpu' | awk 'NR==2{print $4}'`
iostat -x >> $diskio
echo "system iowait(%): $iow"
echo "system iowait(%): $iow" >> $logname
echo ""
echo "" >> $logname
sleep 2

##磁盘io情况
echo "disk usage io in detail"
echo "disk usage io in detail" >> $logname
printf "%-10s %-10s %-10s %-10s %-10s\n" "DISK" "TPS" "AWAIT(ms)" "SVCTM(ms)" "UTIL(%)"
printf "%-10s %-10s %-10s %-10s %-10s\n" "DISK" "TPS" "AWAIT(ms)" "SVCTM(ms)" "UTIL(%)" >> $logname
for i in $diskname;do
  ##tps
  tpsio=`iostat | grep -w "^$i" | awk '{print $2}'`
  ##await
  waitio=`cat $diskio | grep -w "^$i" | awk '{print $10}'`
  ##svctm
  ctmio=`cat $diskio | grep -w "^$i" | awk '{print $13}'`
  ##util
  utiio=`cat $diskio | grep -w "^$i" | awk '{print $14}'`
  printf "%-10s %-10s %-10s %-10s %-10s\n" "$i" "$tpsio" "$waitio" "$ctmio" "$utiio"
  printf "%-10s %-10s %-10s %-10s %-10s\n" "$i" "$tpsio" "$waitio" "$ctmio" "$utiio" >> $logname
  sleep 1
done
sleep 1

echo ""
echo "" >> $logname
echo "---process io status---"
echo "---process io status---" >> $logname
##进程io情况
pidstat -d 1 3 | grep -i 'Average' | grep -v 'UID' >> $proio
pionum=`cat $proio | sed '/^$/d' | wc -l`
if [ "$pionum" = 0 ];then
  echo "process use io is not exist"
  echo "process use io is not exist" >> $logname
else
  echo "process io usage in detail(current):"
  echo "process io usage in detail(current):" >> $logname
  printf "%-15s %-10s %-10s %-10s %-10s %-10s\n" "NAME" "PID" "STIME" "IO_R(KB/S)" "IO_W(KB/S)" "IO_CCWR(KB/S)"
  printf "%-15s %-10s %-10s %-10s %-10s %-10s\n" "NAME" "PID" "STIME" "IO_R(KB/S)" "IO_W(KB/S)" "IO_CCWR(KB/S)" >> $logname
  pion=0
  for j in `cat $proio | awk '{print $3}'`;do
    iona=`cat $proio | awk -v a="$j" '$3==a {print $7}'`
    iopr=`cat $proio | awk -v a="$j" '$3==a {print $4}'`
    iopw=`cat $proio | awk -v a="$j" '$3==a {print $5}'`
    ioccw=`cat $proio | awk -v a="$j" '$3==a {print $6}'`
    iotime=`ps -A -o pid,stime | awk -v a="$j" '$1==a {print $2}'`
    printf "%-15s %-10s %-10s %-10s %-10s %-10s\n" "$iona" "$j"  "$iotime" "$iopr" "$iopw" "$ioccw"
    printf "%-15s %-10s %-10s %-10s %-10s %-10s\n" "$iona" "$j"  "$iotime" "$iopr" "$iopw" "$ioccw" >> $logname
    let pion++
  #  sleep 1
  done
  
  echo "use io process num(current): $pion"
  echo "use io process num(current): $pion" >> $logname
fi
echo ""
echo "" >> $logname
#sleep 2
}


##文件描述符
fde(){
echo -e "\033[1;35m---------check system fd status--------\033[0m"
sleep 2
echo "###system fd status###" >> $logname
echo "time: `date +%F_%H:%M:%S`"
echo "time: `date +%F_%H:%M:%S`" >> $logname 
sleep 2
echo ""
echo "" >> $logname

##判断执行时间间隔是否大于30min
if [ "$lonum" -gt 1 ];then
  fdcurti=`date -d "$(date)" +%s`
  fddifti=`printf "%.0f" $(scale=1;echo "$fdcurti-$fdotime" | bc)`
  if [ "`echo "$fddifti>=1800" | bc`" = 0  ];then
    echo "system fd check time interval less than 30min in last check"
    echo "system fd check time interval less than 30min in last check" >> $logname
    echo ""
    echo "" >> $logname
  return 1
  fi
fi
echo "excute num: $finum"
fdtmp="$tempdir/fdfile.tmp"
echo '' > $fdtmp
##基本信息
##配置的文件最大打开数,单个用户最大文件打开数
fileto=`cat /proc/sys/fs/file-max`
userfito=`ulimit -n`
echo "max open file number in file(system config): $fileto"
echo "max open file number(user config): $userfito"
echo ""
echo "max open number in file(system config): $fileto" >> $logname
echo "max open number in file(user config): $userfito"  >> $logname
echo "" >> $logname
sleep 2
sysps=`ps -A -o pid,user,stime,comm`
##打开文件总数
lsof -n | grep -v "^COMMAND.*PID" >> $fdtmp
filenum=`cat $fdtmp | sed '/^$/d' | wc -l`

##计算当前进程打开的文件数量情况
fiperuse=`printf "%.2f\n" $(echo "scale=5;($filenum/$fileto)*100" | bc)`
echo "file open number(current): $filenum"
echo "file open used(%): ${fiperuse}%"
echo ""
echo "file open number(current): $filenum" >> $logname
echo "file open used(%): ${fiperuse}%" >> $logname
echo "" >> $logname
sleep 2

##输出打开文件前10的进程
filepro=`cat $fdtmp | sed '/^$/d' | awk '{ A[$2]++ } END{for (B in A){ print B, A[B] } }'| sort -nr -k2`
echo "process open file number in Top 10:"
echo "process open file number in Top 10:" >> $logname
printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" "NAME" "USER" "PID" "STIME" "OPENNUM" "USED(%)"
printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" "NAME" "USER" "PID" "STIME" "OPENNUM" "USED(%)" >> $logname
for i in `echo "$filepro" | awk '{print $1}'`;do
  ##进程名称
  #fpsname=`ps -A -o pid,comm | awk -v a=$i '$1==a{print $2}'`
  fpsname=`echo "$sysps" | awk -v a=$i '$1==a{print $4}'`
  ##进程运行时间
  #fpstim=`ps -A -o pid,stime | awk -v a=$i '$1==a{print $2}'`
  fpstim=`echo "$sysps" | awk -v a=$i '$1==a{print $3}'`
  ##进程运行用户
  fpuser=`echo "$sysps" | awk -v a=$i '$1==a{print $2}'`
  ##进程文件打开数
  psfinum=`echo "$filepro" | awk -v a=$i '$1==a{print $2}'`
  ##进程文件打开数占比
  psperfi=`printf "%.2f\n" $(echo "scale=5;($psfinum/$fileto)*100" | bc)`
  ##输出所有信息
  printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" "$fpsname" "$fpuser" "$i" "$fpstim" "$psfinum" "$psperfi%"
  printf "%-20s %-10s %-10s %-10s %-10s %-10s\n" "$fpsname" "$fpuser" "$i" "$fpstim" "$psfinum" "$psperfi%" >> $logname
done | head -10
echo ""
echo "" >> $logname
fdotime=`date -d "$(date)" +%s`
}

$1 2>/dev/null
if [ "$1" = "all" ];then
  clo  
  cuse
  muse
  swu
  psdr
  ino
  size
  iot
  fde
fi
$2 2>/dev/null
$3 2>/dev/null
$4 2>/dev/null
$5 2>/dev/null
$6 2>/dev/null
$7 2>/dev/null
$8 2>/dev/null
$9 2>/dev/null
$10 2>/dev/null 
$11 2>/dev/null
echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++"
echo -e "\033[1;32mexecution completed number: $lonum\033[0m"
echo "------execution completed number: $lonum------" >> $logname
echo ""
echo "" >> $logname
if [ -n "$lofre" ];then
  if [ "$lofre" != "$lonum" ];then
    sleep $extime
  fi
fi
let lonum++
done

finlonum=$(($lonum-1))

trap "rm -rf $tempdir && 
      echo 'Systools execution completed,time: `date +%F_%H:%M:%S` (pid: $pid_lo)' >> $toolslog &&
      echo 'number of runs: $finlonum (pid: $pid_lo)'  >> $toolslog &&
      echo 'systatus log location: $logname (pid: $pid_lo)' >> $toolslog &&
      echo 'pid: $pid_lo number of runs in this time:' $finlonum &&
      echo 'systatus log file:' `echo $logname` &&
      exit 10" EXIT

echo -e "\033[1;32m##############system status check finish##############\033[0m"

脚本下载:systatus

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值