巡检是大多数SA的日常工作之一,但对于如此枯燥乏味的活儿想来大多数SA都提不起兴趣,这时候脚本就派上用场了,跑起来真是多快好省.下面就分享一个简单的AIX巡检脚本.



巡检项:


1. 文件系统使用率的监控(不能超过85%)

2. 交换空间的监控(不能超过70%)

3. errdemon进程运行是否正常

4. 检查系统报错

5. 检查rootvg中逻辑卷是否同步

6. 检查适配卡状态




来看一下脚本,这个脚本会在每台主机上以普通用户执行(结合Crontab),生成一个日志文件,然后调用工具上传到特定主机,以方便检查,整个过程均自动,无需人工操作.


#! /usr/bin/ksh
# Version 1.0
# AUTHOR:       Xin23   http://weibo.com/231988
NextLine()
{
        echo ' '>> $(date +%Y_%m_%d)_$(hostname)_Total.log
}
EchoTitle()
{
        echo "-------------------$TITLE-------------------" >> $(date +%Y_%m_%d)_$(hostname)_Total.log
        NextLine
}
#第一部分:  检查并记录异常到日志
df -k | awk ' {printf "%-33s%8s\n",$1,$4}' | awk '{split($2,b,"%");if(b[1]>85)print}'| sort -nrk 2 >> /tmp/$(date +%Y_%m_%d)_$(hostname)_FileSystem.log
#当文件系统使用率超过85%时报警
ps -ef | grep errdemon | grep -v grep >> /tmp/$(date +%Y_%m_%d)_$(hostname)_ErrDemon.log
#检查errdemon进程是否启动
errpt >> /tmp/$(date +%Y_%m_%d)_$(hostname)_Error.log
#检查系统报错
lsvg -l rootvg | tail +3 | grep -v syncd >> /tmp/$(date +%Y_%m_%d)_$(hostname)_Disk.log
#检查rootvg中逻辑卷状态
lsps -a | awk ' $5 > 70 {printf "%-13s%8s%8s\n",$1,$4,$5}'| tail +2 >> /tmp/$(date +%Y_%m_%d)_$(hostname)_Paging.log
#检查交换空间,使用率高于70%时报警
lsdev -Cc adapter | grep -v Available >> /tmp/$(date +%Y_%m_%d)_$(hostname)_Adapter.log
#检查适配卡状态
#第二部分:  汇总异常
cd /tmp
TITLE=HACMP
EchoTitle
if [ -s $(date +%Y_%m_%d)_$(hostname)_HACMP.log ]
then echo 'HACMP SERVICE IP IS OK!' >> $(date +%Y_%m_%d)_$(hostname)_Total.log
else echo 'Warning: HACMP FAILED!' >> $(date +%Y_%m_%d)_$(hostname)_Total.log
fi
NextLine
TITLE=FileSystem
EchoTitle
if [ -s $(date +%Y_%m_%d)_$(hostname)_FileSystem.log ]
then echo 'Warning: FileSystem has an error!' >> $(date +%Y_%m_%d)_$(hostname)_Total.log
        cat $(date +%Y_%m_%d)_$(hostname)_FileSystem.log >> $(date +%Y_%m_%d)_$(hostname)_Total.log;
else echo 'FileSystem is ok' >> $(date +%Y_%m_%d)_$(hostname)_Total.log;
fi
NextLine
TITLE=ErrDemon
EchoTitle
if [ -s $(date +%Y_%m_%d)_$(hostname)_ErrDemon.log ]
then echo 'ErrDemon is ok' >> $(date +%Y_%m_%d)_$(hostname)_Total.log;
else echo 'Warning: ErrDemon was Stoped!' >> $(date +%Y_%m_%d)_$(hostname)_Total.log;
fi
NextLine
TITLE=Error
EchoTitle
if [ -s $(date +%Y_%m_%d)_$(hostname)_Error.log ]
then echo 'Warning: There is an error in system!'>> $(date +%Y_%m_%d)_$(hostname)_Total.log
        NextLine
        cat $(date +%Y_%m_%d)_$(hostname)_Error.log >> $(date +%Y_%m_%d)_$(hostname)_Total.log;
else echo 'No Error in System!' >> $(date +%Y_%m_%d)_$(hostname)_Total.log;
fi
NextLine
TITLE=Disk
EchoTitle
if [ -s $(date +%Y_%m_%d)_$(hostname)_Disk.log ]
then echo 'Warning: There is something wrong with disk!' >> $(date +%Y_%m_%d)_$(hostname)_Total.log
        cat $(date +%Y_%m_%d)_$(hostname)_Disk.log >> $(date +%Y_%m_%d)_$(hostname)_Total.log;
else echo 'Disk State is ok' >> $(date +%Y_%m_%d)_$(hostname)_Total.log;
fi
NextLine
#cat $(date +%Y_%m_%d)_$(hostname)_Mem.log >> $(date +%Y_%m_%d)_$(hostname)_Total.log;
TITLE=Paging
EchoTitle
if [ -s $(date +%Y_%m_%d)_$(hostname)_Paging.log ]
then echo 'Warning: There is something wrong with paging space' >> $(date +%Y_%m_%d)_$(hostname)_Total.log
        cat $(date +%Y_%m_%d)_$(hostname)_Paging.log >> $(date +%Y_%m_%d)_$(hostname)_Total.log;
else echo 'Paging space is ok' >> $(date +%Y_%m_%d)_$(hostname)_Total.log;
fi
NextLine
TITLE=Adapter
EchoTitle
if [ -s $(date +%Y_%m_%d)_$(hostname)_Adapter.log ]
then echo 'Warning: Adapter error' >> $(date +%Y_%m_%d)_$(hostname)_Total.log
        cat $(date +%Y_%m_%d)_$(hostname)_Adapter.log >> $(date +%Y_%m_%d)_$(hostname)_Total.log;
else echo 'Adapter is ok' >> $(date +%Y_%m_%d)_$(hostname)_Total.log;
fi
NextLine
TITLE='FileSystem Status'
EchoTitle
df -k | awk '{printf "%-22s%12s%10s%6s%10s%7s %s\n", $1,$2,$3,$4,$5,$6,$7}' >> $(date +%Y_%m_%d)_$(hostname)_Total.log
NextLine
#第三部分:  调用脚本上传到主机
DATE=$(date +%Y_%m_%d)
HOSTNAME=$(hostname)
scp.sh $DATE $HOSTNAME
#请使用绝对路径



再来看一下上传工具scp.sh


#!/usr/bin/expect
# Version 1.0
# AUTHOR:       Xin23   http://weibo.com/231988
set timeout 3
set IP xxx
set USER xxx
set PASSWORD xxx
set DATE [lindex $argv 0]
set HOSTNAME [lindex $argv 1]
spawn scp /tmp/$DATE\_$HOSTNAME\_Total.log $USER@$IP:/tmp/log
expect {
                "(yes/no)?" {send "yes\r"}
                "*assword:" {send "$PASSWORD\r"}
}
expect {
                "$" {send "\r"}
                "*assword:" {send "$PASSWORD\r"}
        }
exit




来看一下效果:


被上传的巡检日志:


164703449.jpg


巡检:


grep Warning 即可,因为有任何异常日志都会记录关键字Warning,这样就一目了然了.


$ grep Warning *

$


一切正常




The End