这个脚本主要的目的是监控机器上CPU和内存的使用情况,如果超过一定的阈值,打印出占用CPU或者内存资源较多的进程信息,也可以替换成其它方式。
主要功能如下:
1)将内存和CPU资源的监控情况记录到日志文件中
2)控制日志文件大小,周期性检查(30天)日志文件大小,当日志文件超过一定程度之后自动进行清理日志文件。
声明: 下面获取内存和CPU使用情况的代码参考《Linux.Shell编程从入门到精通》最后一个章节的部分代码,如有侵犯,请告知,谢谢。
#! /bin/bash
# this scripts is used to check host status(including memory CPU and so on)
# 20171024 -- first version
LOG_FILE="/tmp/monitor_log"
check_interval=10
max_usage_ration_memory=80
max_usage_ration_cpu=90
top_process_number=5
function check_log_size() {
if [ -f "$1" ]
then
file_size=$(stat "$1" | grep Size | awk '{print $2}')
echo "current log file size : $file_size"
if [[ "$file_size" -gt 1048576 ]]
then
echo "Clean old records as log file($1) is too big $(date)" 2>&1 | tee "$1"
fi
fi
}
function disply_top_memory_process_information() {
process_number=$(ps aux | sort -nk 4r | wc -l)
echo "current process number is $process_number"
if [[ "$process_number" -gt "$top_process_number" ]]
then
process_number=$((top_process_number+1))
fi
echo "the most $((process_number-1)) process costing memory resouces are as follows: " 2>&1 | tee -a "$1"
echo "$(ps aux | sort -nk 4r | head -$process_number) " 2>&1 | tee -a "$1"
echo "timestamp is $(date)"
}
function monitor_memory() {
memory_total=$(grep MemTotal /proc/meminfo | awk '{print $2}' )
memory_free=$(grep MemFree /proc/meminfo | awk '{print $2}' )
memory_usage=$((100-memory_free*100/memory_total))
echo "Current memory usage: $memory_usage at $(date)" 2>&1 | tee -a "$1"
if [[ "$memory_usage" -gt "$max_usage_ration_memory" ]]
then
echo "Current Memory usage is too high "
disply_top_memory_process_information "$1"
return 1
else
return 0
fi
}
function disply_top_cpu_process_information() {
process_number=$(ps aux | sort -nk 3r | wc -l)
echo "current process number is $process_number"
if [[ "$process_number" -gt "$top_process_number" ]]
then
process_number=$((top_process_number+1))
fi
echo "the most $((process_number-1)) process costing cpu resouces are as follows: " 2>&1 | tee -a "$1"
echo "$(ps aux | sort -nk 4r | head -$process_number) " 2>&1 | tee -a "$1"
echo "timestamp is $(date)"
}
function get_cpu_info() {
grep -i "^cpu[0-9]\+" /proc/stat | awk '{used+=$2+$3+$4; unused+=$5+$6+$7+$8} END {print used, unused}'
}
function monitor_cpu() {
cpu_info_1=$(get_cpu_info)
sleep "$check_interval"
cpu_info_2=$(get_cpu_info)
cpu_usage=$(echo "$cpu_info_1" "$cpu_info_2" | awk '{used=$3-$1;total+=$3+$4-$1-$2; print int(used*100/total)}')
echo "Current cpu usage: $cpu_usage at $(date)" 2>&1 | tee -a "$1"
if [[ "$cpu_usage" -gt "$max_usage_ration_cpu" ]]
then
echo "Current CPU usage is too high"
disply_top_cpu_process_information $1
return 1
else
return 0
fi
}
check_time=1
check_log_size $LOG_FILE
while [[ "1" -lt "2" ]]
do
check_time=$((check_time+1))
if [[ "$check_time" -gt "367200" ]] #3600*24*30/10=367200
then
check_log_size $LOG_FILE
check_time=1
fi
monitor_memory $LOG_FILE
monitor_cpu $LOG_FILE
sleep "$check_interval"
done