check_iostat
内容如下:
#!/bin/sh
#
# Version 0.0.1 20160908
#
# by curious
iostat=`which iostat 2>/dev/null`
bc=`which bc 2>/dev/null`
function help {
echo -e "\n\tThis plugin shows the I/O usage of the specified disk, using the iostat external program.\n\tIt prints three statistics:\n\t (await) 每一个IO请求的处理时间<5ms正常\n\t (svctm) 表示平均每次设备I/O操作的服务时间(以毫秒为单位)。如果svctm的值与await很接近,表示几乎没有I/O等待,磁盘性能很好。如果await的值远高于svctm的值,则表示I/O队列等待太长\n\t (until) 当值大于80且await<5ms时,表示磁盘正常但很繁忙\n\t$0:\n\t-d <disk>\t\tDevice to be checked (without the full path, eg. sda)\n\t-c <await>,<svctm>,<until>\tSets the CRITICAL level for await, svctm and until, respectively\n\t-w <await>,<svctm>,<until>\tSets the WARNING level for await, svctm and until, respectively\n"
exit -1
}
# Ensuring we have the needed tools:
( [ ! -f $iostat ] || [ ! -f $bc ] ) && \
( echo "ERROR: You must have iostat and bc installed in order to run this plugin" && exit -1 )
# Getting parameters:
while getopts "d:w:c:h" OPT; do
case $OPT in
"d") disk=$OPTARG;;
"w") warning=$OPTARG;;
"c") critical=$OPTARG;;
"h") help;;
esac
done
# Adjusting the three warn and crit levels:
crit_await=`echo $critical | cut -d, -f1`
crit_svctm=`echo $critical | cut -d, -f2`
crit_until=`echo $critical | cut -d, -f3`
warn_await=`echo $warning | cut -d, -f1`
warn_svctm=`echo $warning | cut -d, -f2`
warn_until=`echo $warning | cut -d, -f3`
# Checking parameters:
[ ! -b "/dev/$disk" ] && echo "ERROR: Device incorrectly specified" && help
( [ "$warn_await" == "" ] || [ "$warn_svctm" == "" ] || [ "$warn_until" == "" ] || \
[ "$crit_await" == "" ] || [ "$crit_svctm" == "" ] || [ "$crit_until" == "" ] ) &&
echo "ERROR: You must specify all warning and critical levels" && help
( [[ "$warn_await" -ge "$crit_await" ]] || \
[[ "$warn_svctm" -ge "$crit_svctm" ]] || \
[[ "$warn_until" -ge "$crit_until" ]] ) && \
echo "ERROR: critical levels must be highter than warning levels" && help
# Doing the actual check:
#iostat的第一次取值都比较大,不准确,再次取第三次的值
await=`$iostat $disk -k -x 1 3 | grep $disk |tail -1 | awk '{print $10}'`
svctm=`$iostat $disk -k -x 1 3| grep $disk | tail -1 |awk '{print $11}'`
until=`$iostat $disk -k -x 1 3| grep $disk | tail -1 |awk '{print $12}'`
# Comparing the result and setting the correct level:
if ( [ "`echo "$await >= $crit_await" | bc`" == "1" ] || [ "`echo "$svctm >= $crit_svctm" | bc`" == "1" ] || \
[ "`echo "$until >= $crit_until" | bc`" == "1" ] ); then
msg="CRITICAL"
status=2
else if ( [ "`echo "$await >= $warn_await" | bc`" == "1" ] || [ "`echo "$svctm >= $warn_svctm" | bc`" == "1" ] || \
[ "`echo "$until >= $warn_until" | bc`" == "1" ] ); then
msg="WARNING"
status=1
else
msg="OK"
status=0
fi
fi
# Printing the results:
echo "$msg - I/O stats await=$await svctm=$svctm until=$until | 'await'=$await;${warn_await};${crit_await}; svctm'=$svctm;${warn_svctm};${crit_svctm} 'until'=$until;${warn_until};${crit_until}"
# Bye!
exit $status
使用方法:
./iostat -d sda -w 100,100,80 -c 200,200,95
-w 后面的三个数值分别代表await、svctm、until告警值
-c后面的三个参数值分别代表await、svctm、until紧急值
内容如下:
#!/bin/sh
#
# Version 0.0.1 20160908
#
# by curious
iostat=`which iostat 2>/dev/null`
bc=`which bc 2>/dev/null`
function help {
echo -e "\n\tThis plugin shows the I/O usage of the specified disk, using the iostat external program.\n\tIt prints three statistics:\n\t (await) 每一个IO请求的处理时间<5ms正常\n\t (svctm) 表示平均每次设备I/O操作的服务时间(以毫秒为单位)。如果svctm的值与await很接近,表示几乎没有I/O等待,磁盘性能很好。如果await的值远高于svctm的值,则表示I/O队列等待太长\n\t (until) 当值大于80且await<5ms时,表示磁盘正常但很繁忙\n\t$0:\n\t-d <disk>\t\tDevice to be checked (without the full path, eg. sda)\n\t-c <await>,<svctm>,<until>\tSets the CRITICAL level for await, svctm and until, respectively\n\t-w <await>,<svctm>,<until>\tSets the WARNING level for await, svctm and until, respectively\n"
exit -1
}
# Ensuring we have the needed tools:
( [ ! -f $iostat ] || [ ! -f $bc ] ) && \
( echo "ERROR: You must have iostat and bc installed in order to run this plugin" && exit -1 )
# Getting parameters:
while getopts "d:w:c:h" OPT; do
case $OPT in
"d") disk=$OPTARG;;
"w") warning=$OPTARG;;
"c") critical=$OPTARG;;
"h") help;;
esac
done
# Adjusting the three warn and crit levels:
crit_await=`echo $critical | cut -d, -f1`
crit_svctm=`echo $critical | cut -d, -f2`
crit_until=`echo $critical | cut -d, -f3`
warn_await=`echo $warning | cut -d, -f1`
warn_svctm=`echo $warning | cut -d, -f2`
warn_until=`echo $warning | cut -d, -f3`
# Checking parameters:
[ ! -b "/dev/$disk" ] && echo "ERROR: Device incorrectly specified" && help
( [ "$warn_await" == "" ] || [ "$warn_svctm" == "" ] || [ "$warn_until" == "" ] || \
[ "$crit_await" == "" ] || [ "$crit_svctm" == "" ] || [ "$crit_until" == "" ] ) &&
echo "ERROR: You must specify all warning and critical levels" && help
( [[ "$warn_await" -ge "$crit_await" ]] || \
[[ "$warn_svctm" -ge "$crit_svctm" ]] || \
[[ "$warn_until" -ge "$crit_until" ]] ) && \
echo "ERROR: critical levels must be highter than warning levels" && help
# Doing the actual check:
#iostat的第一次取值都比较大,不准确,再次取第三次的值
await=`$iostat $disk -k -x 1 3 | grep $disk |tail -1 | awk '{print $10}'`
svctm=`$iostat $disk -k -x 1 3| grep $disk | tail -1 |awk '{print $11}'`
until=`$iostat $disk -k -x 1 3| grep $disk | tail -1 |awk '{print $12}'`
# Comparing the result and setting the correct level:
if ( [ "`echo "$await >= $crit_await" | bc`" == "1" ] || [ "`echo "$svctm >= $crit_svctm" | bc`" == "1" ] || \
[ "`echo "$until >= $crit_until" | bc`" == "1" ] ); then
msg="CRITICAL"
status=2
else if ( [ "`echo "$await >= $warn_await" | bc`" == "1" ] || [ "`echo "$svctm >= $warn_svctm" | bc`" == "1" ] || \
[ "`echo "$until >= $warn_until" | bc`" == "1" ] ); then
msg="WARNING"
status=1
else
msg="OK"
status=0
fi
fi
# Printing the results:
echo "$msg - I/O stats await=$await svctm=$svctm until=$until | 'await'=$await;${warn_await};${crit_await}; svctm'=$svctm;${warn_svctm};${crit_svctm} 'until'=$until;${warn_until};${crit_until}"
# Bye!
exit $status
使用方法:
./iostat -d sda -w 100,100,80 -c 200,200,95
-w 后面的三个数值分别代表await、svctm、until告警值
-c后面的三个参数值分别代表await、svctm、until紧急值