获取linux的健康状态

脚本来自  http://linoxide.com/linux-shell-script/shell-script-check-linux-system-health/

略有修改


测试环境:ubuntu

脚本作用:用来获取 hostnamekernel versionuptimecpu / memory / disk usage,需要安装 sysstat

sudo apt-get install sysstat


脚本:

#tell which shell to use
#!/bin/bash
#Here we put email address to send email with report. If no email provided – log file will be just saved.
EMAIL='alerts@account.com'
#We will create function to easily manage what to do with output.
sysstat () {

#Print header, hostname (hostname command used), Kernel version (uname -r) , Uptime (from uptime command) and Last reboot time (from who command)
echo "

####################################################################

Health Check Report (CPU,Process,Disk Usage, Memory)

####################################################################

#hostname command returns hostname
Hostname : `hostname`

#uname command with key -r returns Kernel version
Kernel Version : `uname -r`

#uptime command used to get uptime, and with sed command we cat process output to get only uptime.
Uptime : `uptime | sed 's/.*up \([^,]*\), .*/\1/'`

#who command is used to get last reboot time, awk for processing output
Last Reboot Time : `who -b | awk '{print $3,$4}'`

*********************************************************************

CPU Load - > Threshold < 1 Normal > 1 Caution , > 2 Unhealthy

*********************************************************************

"

#here we check if mpstat command is in our system
MPSTAT=`which mpstat`

#here we get exit code from previous command
MPSTAT=$?

#if exit status in not 0, this means that mpstat command is not found (or not exist in our system)
if [ $MPSTAT != 0 ]

then

echo "Please install mpstat!"

echo "On Debian based systems:"

echo "sudo apt-get install sysstat"

echo "On RHEL based systems:"

echo "yum install sysstat"

else

echo ""

#here we check in same way if lscpu installed
LSCPU=`which lscpu`

LSCPU=$?

if [ $LSCPU != 0 ]

then

RESULT=$RESULT" lscpu required to procedure accurate results"

else


ISOPENVZ=`ifconfig | grep -c venet`
if [ $ISOPENVZ != 0 ]
then
#if we use openvz 
cpus=`cat /proc/cpuinfo | grep -c processor`

else
#if we have lscpu installed, we can get number of CPU's on our system and get statistic for each using mpstat command.
cpus=`lscpu | grep -e "^CPU(s):" | cut -f2 -d: | awk '{print $1}'`
fi

i=0

#here we make loop to get and print CPU usage statistic for each CPU.
while [ $i -lt $cpus ]

do

#here we get statistic for CPU and print it. Awk command help to do this, since output doesn't allow this to do with grep. AWK check if third value is equal to variable $i (it changes from 0 to number of CPU), and print %usr value for this CPU
echo "CPU$i : `mpstat -P ALL | awk -v var=$i '{ if ($3 == var ) print $4 }' `"

#here we increment $i variable for loop
SHELLENV=`ps | grep $$ | awk '{print $4}'`
if [ $SHELLENV != bash ]
then
i=$((i+1))
else
let i=$i+1
fi

done

fi

echo "

#here with uptime command we get load average for system, and cut command helps to process result.
Load Average : `uptime | awk -F'load average:' '{ print $2 }' | cut -f1 -d,`

#same as before, but with awk command we check if system is Normal (if value less than 1, Caution (if between 1 and 2) and Unhealthy.
Heath Status : `uptime | awk -F'load average:' '{ print $2 }' | cut -f1 -d, | awk '{if ($1 > 2) print "Unhealthy"; else if ($1 > 1) print "Caution"; else print "Normal"}'`

"

fi

echo "

******************************************************************

Process

******************************************************************

Top memory using processs/application

PID %MEM RSS COMMAND

#with ps command we get list of processes,  awk show only needed columns. After with sort command we sort it by third column and we need only top 10, that why we used head command
`ps aux | awk '{print $2, $4, $6, $11}' | sort -k3rn | head -n 10`

Top CPU using process/application

#with top command we get top CPU using processes, and with combination of head and tail we get top 10.
`top b -n1 | head -17 | tail -11`

**********************************************************************

Disk Usage - > Threshold < 90 Normal > 90% Caution > 95 Unhealthy

**********************************************************************

"
#we get disk usage with df command. -P key used to have postfix like output (there was problems with network shares, etc and -P resolve this problems). We print output to temp file to work with info more than one.
df -Pkh | grep -v 'Filesystem' > /tmp/df.status

#We create loop to process line by line from df.status
while read DISK

do

#here we get line from df.status and print result formatted with awk command
LINE=`echo $DISK | awk '{print $1,"\t",$6,"\t",$5," used","\t",$4," free space"}'`

echo $LINE

echo

done < /tmp/df.status

echo "

Heath Status"

echo

#here almost same loop, but we check disk usage, and print Normal if value less 90, Caution if between 90 and 95, and Unhealthy if greater than 95)
while read DISK

do

USAGE=`echo $DISK | awk '{print $5}' | cut -f1 -d%`

if [ $USAGE -ge 95 ]

then

STATUS='Unhealthy'

elif [ $USAGE -ge 90 ]

then

STATUS='Caution'

else

STATUS='Normal'

fi

LINE=`echo $DISK | awk '{print $1,"\t",$6}'`

#here we print result with status
echo  $LINE "\t\t" $STATUS

echo

done < /tmp/df.status

#here we remove df.status file
rm /tmp/df.status

#here we get Total Memory, Used Memory, Free Memory, Used Swap and Free Swap values and save them to variables.
TOTALMEM=`free -m | head -2 | tail -1| awk '{print $2}'`
#All variables like this is used to store values as float (we are using bc to make all mathematics operations, since without bc all values will be integer). Also we use if to add zero before value, if value less than 1024, and result of dividing will be less than 1.
TOTALBC=`echo "scale=2;if($TOTALMEM < 1024) print 0;$TOTALMEM/1024" |bc -l`
USEDMEM=`free -m | head -2 | tail -1| awk '{print $3}'`
USEDBC=`echo "scale=2;if($USEDMEM < 1024) print 0;$USEDMEM/1024" |bc -l`
FREEMEM=`free -m | head -2 | tail -1| awk '{print $4}'`
FREEBC=`echo "scale=2;if($FREEMEM < 1024) print 0;$FREEMEM/1024" |bc -l`

TOTALSWAP=`free -m | tail -1| awk '{print $2}'`
TOTALSBC=`echo "scale=2;if($TOTALSWAP < 1024) print 0;$TOTALSWAP/1024" |bc -l`
USEDSWAP=`free -m | tail -1| awk '{print $3}'`
USEDSBC=`echo "scale=2;if($USEDSWAP < 1024) print 0;$USEDSWAP/1024" |bc -l`
FREESWAP=`free -m | tail -1| awk '{print $4}'`
FREESBC=`echo "scale=2;if($FREESWAP < 1024) print 0;$FREESWAP/1024" |bc -l`

SHELLENV=`ps | grep $$ | awk '{print $4}'`
if [ $SHELLENV != bash ]
then
ECHO='echo'
else
ECHO='echo -e'
fi


$ECHO "

********************************************************************

Memory

********************************************************************

Physical Memory

Total\tUsed\tFree\t%Free

# as we get values in GB, also we get % of usage dividing Free by Total
${TOTALBC}GB\t${USEDBC}GB \t${FREEBC}GB\t$(($FREEMEM * 100 / $TOTALMEM ))%

Swap Memory

Total\tUsed\tFree\t%Free

#Same as above – values in GB, and in same way we get % of usage
${TOTALSBC}GB\t${USEDSBC}GB\t${FREESBC}GB\t$(($FREESWAP * 100 / $TOTALSWAP ))%
"
}


#here we make filename value, using hostname, and date.
FILENAME="health-`hostname`-`date +%y%m%d`-`date +%H%M`.txt"

#here we run function and save result to generated filename
sysstat > $FILENAME

#here we print output to user.
echo  "Reported file $FILENAME generated in current directory." $RESULT

#here we check if user provide his email address to send email
if [ "$EMAIL" != '' ]

then

#if email proviced – we check if we have mailx command to send email
STATUS=`which mail`
#if mailx command not exist on system (previous command returned non-zero exit code we warn user that mailx is not installed
if [ "$?" != 0 ]

then

echo "The program 'mail' is currently not installed."

#if mailx installed, we send email with report to user
else

cat $FILENAME | mail -s "$FILENAME" $EMAIL

fi

fi


执行结果:

ubuntu@localhost:~$ bash LinuxHealthCheck.sh 
Reported file health-localhost-161120-0855.txt generated in current directory.
postdrop: warning: unable to look up public/pickup: No such file or directory  ====没有安装mail导致,暂不实现


文件中的内容:

Reported file health-localhost-161120-0821.txt generated in current directory.
postdrop: warning: unable to look up public/pickup: No such file or directory
ubuntu@localhost:~$ cat health-localhost-161120-0821.txt 




####################################################################


Health Check Report (CPU,Process,Disk Usage, Memory)


####################################################################


#hostname command returns hostname
Hostname : localhost


#uname command with key -r returns Kernel version
Kernel Version : 2.6.32-042stab113.21


#uptime command used to get uptime, and with sed command we cat process output to get only uptime.
Uptime : 139 days


#who command is used to get last reboot time, awk for processing output
Last Reboot Time : Jul 3


*********************************************************************


CPU Load - > Threshold < 1 Normal > 1 Caution , > 2 Unhealthy


*********************************************************************




Please install mpstat!
On Debian based systems:
sudo apt-get install sysstat
On RHEL based systems:
yum install sysstat




******************************************************************


Process


******************************************************************


Top memory using processs/application


PID %MEM RSS COMMAND


#with ps command we get list of processes,  awk show only needed columns. After with sort command we sort it by third column and we need only top 10, that why we used head command
7742 1.5 4108 sshd:
324 0.9 2388 /usr/bin/python
7754 0.8 2324 -bash
7753 0.7 1964 sshd:
64 0.6 1692 /lib/systemd/systemd-journald
332 0.6 1644 /usr/bin/python3
25308 0.5 1384 awk
296 0.5 1312 /usr/sbin/openvpn
25307 0.4 1160 ps
372 0.3 812 ss-server


Top CPU using process/application


#with top command we get top CPU using processes, and with combination of head and tail we get top 10.
  PID USER      PR  NI    VIRT    RES    SHR S %CPU %MEM     TIME+
    1 root      20   0   36928    704    536 S  0.0  0.3   1:59.82
    2 root      20   0       0      0      0 S  0.0  0.0   0:00.00
    3 root      20   0       0      0      0 S  0.0  0.0   0:00.00
   64 root      20   0   31104   1692   1584 S  0.0  0.6   0:22.97
   66 root      20   0   41464    336    284 S  0.0  0.1   0:13.80
  293 systemd+  20   0   26520    528    448 S  0.0  0.2   3:14.20
  296 root      20   0   31336   1312    924 S  0.0  0.5   1:55.78
  299 root      20   0   69880    624    512 S  0.0  0.2   0:02.60
  324 root      20   0   56732   2388    868 S  0.0  0.9  28:10.30
  332 root      20   0  292564   1644    716 S  0.0  0.6  16:14.72


**********************************************************************


Disk Usage - > Threshold < 90 Normal > 90% Caution > 95 Unhealthy


**********************************************************************




/dev/simfs / 11% used 11G free space


devtmpfs /dev 0% used 128M free space


tmpfs /dev/shm 0% used 128M free space


tmpfs /run 13% used 112M free space


tmpfs /run/lock 0% used 5.0M free space


tmpfs /sys/fs/cgroup 0% used 128M free space


none /run/shm 0% used 128M free space






Heath Status


/dev/simfs /             Normal


devtmpfs /dev            Normal


tmpfs /dev/shm           Normal


tmpfs /run               Normal


tmpfs /run/lock                  Normal


tmpfs /sys/fs/cgroup             Normal


none /run/shm            Normal






********************************************************************


Memory


********************************************************************


Physical Memory


Total   Used    Free    %Free


# as we get values in GB, also we get % of usage dividing Free by Total
0.25GB  0.15GB  0.08GB  35%


Swap Memory


Total   Used    Free    %Free


#Same as above – values in GB, and in same way we get % of usage
0.12GB  0.03GB  0.09GB  73%

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值