《Linux运维总结:Shell脚本实现监控磁盘、CPU、内存占用情况》

一、前言

本文主要通过shell实现如何监控磁盘、内存、CPU的使用情况。


二、脚本

#!/bin/bash
m_c () {
    local mark message
    mark=$(echo "$@" | awk '{print $1}')
    shift
    if [[ ${mark} == "INFO" ]]; then
        #message="$(tput setf 2)$@$(tput sgr0)"
        message="\033[32m$@\033[0m"
    elif [[ ${mark} == "WARN" ]]; then
        #message="$(tput setf 6)[提醒] $@$(tput sgr0)"
        message="\033[33m[提醒] $@\033[0m"
    elif [[ ${mark} == "ERROR" ]]; then
        #message="$(tput setf 4)[重要] $@$(tput sgr0)"
        message="\033[31m[重要] $@\033[0m"
    else
        message="$(tput setf 4)参数[${mark}]输入错误, 请检查$(tput sgr0)"
    fi
    echo -e ${message}
}
############################################################################################################
# check disk
check_disk () {
    echo ">>> 磁盘 检查结果 <<<"
    export index_threshold=70 
    export data_threshold=70

    # get index usage
    #index_ret=$(df -li |grep -E '^/dev/' | awk -v it=${index_threshold} \
    #'{sub("%","",$(NF-1));if($(NF-1) > it) print $NF" "$(NF-1)"%"}' | awk 'BEGIN{ORS=" | "}{print $0}' | sed 's/| $//')
    
    #echo "分区索引使用率:"
    df -li |grep -E '^/dev/' | awk '{print $NF,$(NF-1)}' | while read m r; do
        if (( ${r/\%/} > ${index_threshold} )); then
            #echo "  ${m}: "$(m_c ERROR ${r})
            echo "分区[${m}]索引使用率: "$(m_c ERROR ${r})
        else
            #echo "  ${m}: "$(m_c INFO ${r})
            echo "分区[${m}]索引使用率: "$(m_c INFO ${r})
        fi
    done
    
    # get sapce usage
    #data_ret=$(df -l| grep -E '^/dev/' | awk -v dt=${data_threshold} \
    #'{sub("%","",$(NF-1));if($(NF-1) > dt) print $NF" "$(NF-1)"%"}' | awk 'BEGIN{ORS=" | "}{print $0}' | sed 's/| $//')
    #echo "分区空间使用率: "
    df -l| grep -E '^/dev/'  | awk '{print $NF,$(NF-1)}' | while read m r; do
        if (( ${r/\%/} > ${data_threshold} )); then
            #echo "  ${m}: "$(m_c ERROR ${r})
            echo "分区[${m}]空间使用率: "$(m_c ERROR ${r})
        else
            #echo "  ${m}: "$(m_c INFO ${r})
            echo "分区[${m}]空间使用率: "$(m_c INFO ${r})
        fi
        
    done
}
############################################################################################################
# check memory usage
check_mem () {
    echo
    echo ">>> 内存 检查结果 <<<"
    local SwapTotal SwapFree MemTotal MemAvailable  swap_usage_rate
    eval $(grep -E 'MemTotal|MemAvailable|SwapTotal|SwapFree' /proc/meminfo | awk '{print $1$2}' | sed 's/:/=/g')
    mem_usage_rate=$(awk -v mt=${MemTotal} -v ma=${MemAvailable} 'BEGIN{print (mt-ma)/mt*100}' | awk -F. '{print $1}')

    if (( ${SwapTotal} != ${SwapFree} )); then
        swap_usage_rate=$(awk -v st=${SwapTotal} -v sf=${SwapFree} 'BEGIN{print (st-sf)/st*100}' | awk -F. '{print $1}')
        if (( ${swap_usage_rate} > 80 )); then
            echo "swap虚拟内存利用率: "$(m_c ERROR "${swap_usage_rate}%")
        else
            echo "swap虚拟内存利用率: "$(m_c INFO "${swap_usage_rate}%")
        fi
        m_c WARN "注意:swap虚拟内存已经使用[$(awk -v st=${SwapTotal} -v sf=${SwapFree} 'BEGIN{print st-sf}') KB], 表明物理内存被完全使用过, 值得关注."
    fi
    if (( ${mem_usage_rate} > 80 )); then
        echo "物理内存使用率: "$( m_c ERROR "${mem_usage_rate}%, 使用率太高了, 请及时扩容.")
    else
        echo "物理内存使用率: "$(m_c INFO "${mem_usage_rate}%, 使用正常.")
    fi
}
############################################################################################################
compute () {
    local inter t2 t1 idle1 idle2 reduce_idle_var sum1 sum2 reduce_sum_var
    inter=$1
    t2="$2"
    t1="$3"
    sum1=$(echo "${t1}" | cut -d' ' -f2- | sed 's/ /\n/g' | awk '{SUM+=$1}END{print SUM}')
    sum2=$(echo "${t2}" | cut -d' ' -f2- | sed 's/ /\n/g' | awk '{SUM+=$1}END{print SUM}')
    reduce_sum_var=$(expr ${sum2} - ${sum1})

    idle1=$(echo "${t1}" | awk '{print $5}')
    idle2=$(echo "${t2}" | awk '{print $5}')
    reduce_idle_var=$(expr ${idle2} - ${idle1})

    #echo "(${reduce_sum_var}-${reduce_idle_var})/${reduce_sum_var}*100" | bc -l | awk -F'.' '{if ($1=="") print "0" ;else print $1}'
    awk -v rsv=${reduce_sum_var} -v riv=${reduce_idle_var} 'BEGIN{print (rsv-riv)/rsv*100}' | awk -F'.' '{if ($1=="") print "0" ;else print $1}'
}

# Compute CPU utilization
cpu_utiliza () {
    local interval t1 t2 total_cpu_util all_mess
    interval=30 # 两次获取cpu数据间隔多少秒,默认30s

    # 第一次获取cpu数据
    t1=$(grep '^cpu' /proc/stat) 
    sleep ${interval} 
    t2=$(grep '^cpu' /proc/stat)
    
    # total cpu使用率
    total_cpu_t2=$(echo "${t2}" | head -n1) 
    total_cpu_t1=$(echo "${t1}" | head -n1) 
    total_cpu_util=$(compute ${interval} "${total_cpu_t2}" "${total_cpu_t1}")
    if (( ${total_cpu_util%.*} > 80 )); then
        all_mess="  cpu总使用率: "$(m_c ERROR "${total_cpu_util%.*}%")
    else
        all_mess="  cpu总使用率: "$(m_c INFO "${total_cpu_util%.*}%")
    fi

    # 各核cpu使用率
    for cpu in $(echo "${t1}" | awk '{print $1}' | sed 1d ); do
        cpu_t2=$(echo "${t2}" | grep -w ${cpu})
        cpu_t1=$(echo "${t1}" | grep -w ${cpu})
        value=$(compute ${interval} "${cpu_t2}" "${cpu_t1}")
        if (( ${value%.*} > 80 )); then
            all_mess="${all_mess}\n  ${cpu}使用率: $(tput setf 4) ${value%.*}% $(tput sgr0)"
        else
            all_mess="${all_mess}\n  ${cpu}使用率: $(tput setf 2) ${value%.*}% $(tput sgr0)"
        fi
    done 
    
    # 输出
    echo "${all_mess}"    
}

# Check CPU cores, main frequencies, current usage and load
check_cpu () {
    echo
    local load_mess main_freq_mess cores_cnt freq freq_cnt load_threshold cpu_load cpu_util interval 
    echo ">>> CPU 检查结果 <<<"
    # cpu cores
    cores_cnt=$(grep -c 'processor' /proc/cpuinfo)

    # cpu main freq
    freq=$(grep 'cpu MHz' /proc/cpuinfo | awk '{print $NF}' | sort -n | uniq) 
    freq_cnt=$(echo "${freq}" | wc -l)
    if (( ${freq_cnt} > 1 )); then
        main_freq_mess="WARN CPU主频不一致, 详情执行命令[ grep 'cpu MHz' /proc/cpuinfo ]进行确认."
    else
        main_freq_mess="INFO ${freq} MHz"
    fi

    # cpu load
    #load_threshold=$(echo "scale=1;${cores_cnt} * 0.7" | bc)
    load_threshold=$(awk -v cc=${cores_cnt} 'BEGIN{print cc*0.7}') # cpu负载阀值,每个核不超过0.7 
    load_value=$(w | grep 'load average:' | grep -v grep | awk -F': ' '{print $NF}')
    load_diff=$(echo "${load_value}" | awk -F, -v a=${load_threshold} '{if (($1 > a)||($2>a)||($3>a)) print "1"; else print "0"}')
    if (( ${load_diff} == 1 )); then
        load_mess="ERROR CPU 1分钟,5分钟,15分钟负载[${load_value}], 超过正常阀值[${load_threshold}], 可能影响机器性能,请注意."
    else
        load_mess="INFO CPU 1分钟,5分钟,15分钟负载[${load_value}] ,数值正常."
    fi 

    # cpu usage rate
    interval=30
    cpu_util=$(cpu_utiliza ${interval}) 
    
    # summary
    echo "cpu 核数: $(m_c INFO ${cores_cnt})" 
    echo "cpu 主频: $(m_c ${main_freq_mess})"
    echo "cpu 负载: $(m_c ${load_mess})"
    echo "cpu 使用率(统计${interval}s): " 
    echo -e "${cpu_util}"
}
############################################################################################################
check_disk
check_mem
check_cpu

执行结果如下:
在这里插入图片描述


总结:整理不易,如果对你有帮助,可否点赞关注一下?

更多详细内容请参考:Linux运维实战总结

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

东城绝神

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值