一、前言
本文主要通过shell实现如何监控磁盘、内存、CPU的使用情况。
二、脚本
#!/bin/bash
m_c () {
local mark message
mark=$(echo "$@" | awk '{print $1}')
shift
if [[ ${mark} == "INFO" ]]; then
#message="$(tput setf 2)$@$(tput sgr0)"
message="\033[32m$@\033[0m"
elif [[ ${mark} == "WARN" ]]; then
#message="$(tput setf 6)[提醒] $@$(tput sgr0)"
message="\033[33m[提醒] $@\033[0m"
elif [[ ${mark} == "ERROR" ]]; then
#message="$(tput setf 4)[重要] $@$(tput sgr0)"
message="\033[31m[重要] $@\033[0m"
else
message="$(tput setf 4)参数[${mark}]输入错误, 请检查$(tput sgr0)"
fi
echo -e ${message}
}
############################################################################################################
# check disk
check_disk () {
echo ">>> 磁盘 检查结果 <<<"
export index_threshold=70
export data_threshold=70
# get index usage
#index_ret=$(df -li |grep -E '^/dev/' | awk -v it=${index_threshold} \
#'{sub("%","",$(NF-1));if($(NF-1) > it) print $NF" "$(NF-1)"%"}' | awk 'BEGIN{ORS=" | "}{print $0}' | sed 's/| $//')
#echo "分区索引使用率:"
df -li |grep -E '^/dev/' | awk '{print $NF,$(NF-1)}' | while read m r; do
if (( ${r/\%/} > ${index_threshold} )); then
#echo " ${m}: "$(m_c ERROR ${r})
echo "分区[${m}]索引使用率: "$(m_c ERROR ${r})
else
#echo " ${m}: "$(m_c INFO ${r})
echo "分区[${m}]索引使用率: "$(m_c INFO ${r})
fi
done
# get sapce usage
#data_ret=$(df -l| grep -E '^/dev/' | awk -v dt=${data_threshold} \
#'{sub("%","",$(NF-1));if($(NF-1) > dt) print $NF" "$(NF-1)"%"}' | awk 'BEGIN{ORS=" | "}{print $0}' | sed 's/| $//')
#echo "分区空间使用率: "
df -l| grep -E '^/dev/' | awk '{print $NF,$(NF-1)}' | while read m r; do
if (( ${r/\%/} > ${data_threshold} )); then
#echo " ${m}: "$(m_c ERROR ${r})
echo "分区[${m}]空间使用率: "$(m_c ERROR ${r})
else
#echo " ${m}: "$(m_c INFO ${r})
echo "分区[${m}]空间使用率: "$(m_c INFO ${r})
fi
done
}
############################################################################################################
# check memory usage
check_mem () {
echo
echo ">>> 内存 检查结果 <<<"
local SwapTotal SwapFree MemTotal MemAvailable swap_usage_rate
eval $(grep -E 'MemTotal|MemAvailable|SwapTotal|SwapFree' /proc/meminfo | awk '{print $1$2}' | sed 's/:/=/g')
mem_usage_rate=$(awk -v mt=${MemTotal} -v ma=${MemAvailable} 'BEGIN{print (mt-ma)/mt*100}' | awk -F. '{print $1}')
if (( ${SwapTotal} != ${SwapFree} )); then
swap_usage_rate=$(awk -v st=${SwapTotal} -v sf=${SwapFree} 'BEGIN{print (st-sf)/st*100}' | awk -F. '{print $1}')
if (( ${swap_usage_rate} > 80 )); then
echo "swap虚拟内存利用率: "$(m_c ERROR "${swap_usage_rate}%")
else
echo "swap虚拟内存利用率: "$(m_c INFO "${swap_usage_rate}%")
fi
m_c WARN "注意:swap虚拟内存已经使用[$(awk -v st=${SwapTotal} -v sf=${SwapFree} 'BEGIN{print st-sf}') KB], 表明物理内存被完全使用过, 值得关注."
fi
if (( ${mem_usage_rate} > 80 )); then
echo "物理内存使用率: "$( m_c ERROR "${mem_usage_rate}%, 使用率太高了, 请及时扩容.")
else
echo "物理内存使用率: "$(m_c INFO "${mem_usage_rate}%, 使用正常.")
fi
}
############################################################################################################
compute () {
local inter t2 t1 idle1 idle2 reduce_idle_var sum1 sum2 reduce_sum_var
inter=$1
t2="$2"
t1="$3"
sum1=$(echo "${t1}" | cut -d' ' -f2- | sed 's/ /\n/g' | awk '{SUM+=$1}END{print SUM}')
sum2=$(echo "${t2}" | cut -d' ' -f2- | sed 's/ /\n/g' | awk '{SUM+=$1}END{print SUM}')
reduce_sum_var=$(expr ${sum2} - ${sum1})
idle1=$(echo "${t1}" | awk '{print $5}')
idle2=$(echo "${t2}" | awk '{print $5}')
reduce_idle_var=$(expr ${idle2} - ${idle1})
#echo "(${reduce_sum_var}-${reduce_idle_var})/${reduce_sum_var}*100" | bc -l | awk -F'.' '{if ($1=="") print "0" ;else print $1}'
awk -v rsv=${reduce_sum_var} -v riv=${reduce_idle_var} 'BEGIN{print (rsv-riv)/rsv*100}' | awk -F'.' '{if ($1=="") print "0" ;else print $1}'
}
# Compute CPU utilization
cpu_utiliza () {
local interval t1 t2 total_cpu_util all_mess
interval=30 # 两次获取cpu数据间隔多少秒,默认30s
# 第一次获取cpu数据
t1=$(grep '^cpu' /proc/stat)
sleep ${interval}
t2=$(grep '^cpu' /proc/stat)
# total cpu使用率
total_cpu_t2=$(echo "${t2}" | head -n1)
total_cpu_t1=$(echo "${t1}" | head -n1)
total_cpu_util=$(compute ${interval} "${total_cpu_t2}" "${total_cpu_t1}")
if (( ${total_cpu_util%.*} > 80 )); then
all_mess=" cpu总使用率: "$(m_c ERROR "${total_cpu_util%.*}%")
else
all_mess=" cpu总使用率: "$(m_c INFO "${total_cpu_util%.*}%")
fi
# 各核cpu使用率
for cpu in $(echo "${t1}" | awk '{print $1}' | sed 1d ); do
cpu_t2=$(echo "${t2}" | grep -w ${cpu})
cpu_t1=$(echo "${t1}" | grep -w ${cpu})
value=$(compute ${interval} "${cpu_t2}" "${cpu_t1}")
if (( ${value%.*} > 80 )); then
all_mess="${all_mess}\n ${cpu}使用率: $(tput setf 4) ${value%.*}% $(tput sgr0)"
else
all_mess="${all_mess}\n ${cpu}使用率: $(tput setf 2) ${value%.*}% $(tput sgr0)"
fi
done
# 输出
echo "${all_mess}"
}
# Check CPU cores, main frequencies, current usage and load
check_cpu () {
echo
local load_mess main_freq_mess cores_cnt freq freq_cnt load_threshold cpu_load cpu_util interval
echo ">>> CPU 检查结果 <<<"
# cpu cores
cores_cnt=$(grep -c 'processor' /proc/cpuinfo)
# cpu main freq
freq=$(grep 'cpu MHz' /proc/cpuinfo | awk '{print $NF}' | sort -n | uniq)
freq_cnt=$(echo "${freq}" | wc -l)
if (( ${freq_cnt} > 1 )); then
main_freq_mess="WARN CPU主频不一致, 详情执行命令[ grep 'cpu MHz' /proc/cpuinfo ]进行确认."
else
main_freq_mess="INFO ${freq} MHz"
fi
# cpu load
#load_threshold=$(echo "scale=1;${cores_cnt} * 0.7" | bc)
load_threshold=$(awk -v cc=${cores_cnt} 'BEGIN{print cc*0.7}') # cpu负载阀值,每个核不超过0.7
load_value=$(w | grep 'load average:' | grep -v grep | awk -F': ' '{print $NF}')
load_diff=$(echo "${load_value}" | awk -F, -v a=${load_threshold} '{if (($1 > a)||($2>a)||($3>a)) print "1"; else print "0"}')
if (( ${load_diff} == 1 )); then
load_mess="ERROR CPU 1分钟,5分钟,15分钟负载[${load_value}], 超过正常阀值[${load_threshold}], 可能影响机器性能,请注意."
else
load_mess="INFO CPU 1分钟,5分钟,15分钟负载[${load_value}] ,数值正常."
fi
# cpu usage rate
interval=30
cpu_util=$(cpu_utiliza ${interval})
# summary
echo "cpu 核数: $(m_c INFO ${cores_cnt})"
echo "cpu 主频: $(m_c ${main_freq_mess})"
echo "cpu 负载: $(m_c ${load_mess})"
echo "cpu 使用率(统计${interval}s): "
echo -e "${cpu_util}"
}
############################################################################################################
check_disk
check_mem
check_cpu
执行结果如下:
总结:整理不易,如果对你有帮助,可否点赞关注一下?
更多详细内容请参考:Linux运维实战总结