#!/bin/bash
#############################################
# Define Variables
#############################################
PROC_MONITOR='Program Server Monitor' ## 脚本名称和版本
MONITOR_VER='(1.4)'
## set Program path
PROC_HOME=/home/oracle/project ## Program 主目录
PROC_BIN=${PROC_HOME}/bin ## 程序所在目录
##
SELF_PID=$$ ## 脚本运行时的PID
SELF_NAME=proc_monitor.sh ## 脚本文件名
MONITOR_PARAM=$1 ## 运行脚本的参数一
MONITOR_LOG=${PROC_BIN}/proc_monitor.log ## 监控脚本的日志文件
##
PROC_KEY='Server' ## 查询程序命令信息的key
TMP_FILE=${PROC_BIN}/${PROC_KEY}.ps ## 临时用ps查询出的程序信息
PROC_CFG=${PROC_BIN}/proc_config.cfg ## 需要检查的程序配置文件( 程序名 参数1 ...)
CRON_CFG=${PROC_BIN}/proc_cron.cfg ## crontab config
SLEEP_VAL=3 ## 监控时每次检查的时间间隔
## set LD_LIBRARY_PATH for run programs
source ${PROC_BIN}/proc.bashrc
#############################################
# Define Functions
#############################################
# show the monitor's version
#############################################
function show_version
{
echo " "
echo "========================================="
echo " ${PROC_MONITOR} ${MONITOR_VER}"
echo "========================================="
echo " "
echo " Current Time : " `date +"%Y-%m-%d %T"`
echo " Process ID : " ${SELF_PID}
echo " Parameters : " ${MONITOR_PARAM}
echo "========================================="
}
#############################################
#
# 运行用户程序,参数=程序名+参数1
#
#############################################
function run_proc
{
echo " Run: $1 $2"
cd ${PROC_BIN}
#touch $PROC_BIN/$1.log
echo "`date +'%Y-%m-%d %T'` exec nohup $1 $2 " >> ${MONITOR_LOG}
exec nohup $1 $2 > /dev/null 2>&1 &
}
#############################################
#
# 比较字符串,相等返回0,否则1
#
#############################################
function comp_str
{
#echo ":: comp_str: $1 != $3 || $2 != $4 "
if [[ $1 != $3 || $2 != $4 ]]; then
return 1
else
return 0
fi
}
#############################################
#
# 检查是否存在指定程序, 参数=程序名+参数1
# 程序存在返回1,否则0
#
#############################################
function check_proc_is_running
{
ret=`ps -ef | grep "$*" | grep -v grep`
if [[ "${ret}" == "" ]]; then
return 0
else
return 1
fi
}
#############################################
#
# 显示帮助
#
#############################################
function show_help
{
echo " "
echo " [ Usage ]:"
echo " start - initialize crontab job, and run Program programs"
echo " monitor - monitor(check | run) all Program programs"
echo " check - check all Program programs, [default]"
echo " stop - stop all Program programs"
echo " clean - clean all Program log files and so on"
echo " help - show help"
echo " "
}
#############################################
#
# 监控程序:检测程序状态,并启动异常的程序
#
#############################################
function monitor_programs
{
conf_proc=
conf_num=0
new_run=0
## 用ps查询程序信息
echo " [ Current Programs Running Information ]"
echo "-----------------------------------------"
ps -ef | grep ${PROC_KEY} | grep -v grep | awk '{print $8,$9}'
echo " "
###########
ii=0
echo " [ Program Config File ] "
echo "-----------------------------------------"
while read procname
do
linelen=`echo -n ${procname} | wc -c`
if [[ 0 == ${linelen} ]]; then
continue
fi
conf_proc[${ii}]=${procname}
ii=$(expr ${ii} + 1 )
echo " ${ii} ) ${procname}"
done<${PROC_CFG}
conf_num=${ii}
echo "There are ${ii} programs in ${PROC_CFG} file."
echo " "
echo " [ checking programs information ]"
echo "-----------------------------------------"
i=0
## 主流程
while (( i < ${conf_num} ))
do
check_proc_is_running ${conf_proc[${i}]}
if [[ $? == 1 ]]; then
echo " $(expr ${i} + 1 ) ) : ${conf_proc[${i}]} is OK."
else
######## 进程不存在或者异常,则重新拉起
echo " $(expr ${i} + 1 ) ) : ${conf_proc[${i}]} not exist or exception, will restart run..."
run_proc ${conf_proc[${i}]}
((new_run+=1))
fi
((i+=1))
done
echo "========================================="
echo " New run ${new_run} program(s)."
echo " "
if [[ ${new_run} -ne 0 ]]; then
sleep 3
echo " [ Current Programs Running Information ]"
echo "-----------------------------------------"
ps -ef | grep ${PROC_KEY} | grep -v grep | awk '{print $2,$8,$9}'
fi
echo " "
echo " ${PROC_MONITOR} Exit."
}
#############################################
#
# 初始化:配置系统定时启动检测任务
#
#############################################
function init_monitor_config
{
echo " Initialize monitor config -- create user crontab job"
crontab ${CRON_CFG}
crontab -l
echo " "
}
#############################################
#
# 清除系统定时检测任务
#
#############################################
function clear_monitor_config
{
echo " clear monitor config -- delete user crontab job"
crontab -r
echo " "
}
#############################################
#
# Kill程序,参数=程序名+参数1
#
#############################################
function kill_proc
{
echo "-- kill $3 $1 $2"
echo "`date +'%Y-%m-%d %T'` kill $3 $1 $2 " >> ${MONITOR_LOG}
kill $3
}
#############################################
#
# 终止程序:关闭所有程序, 并停止系统定时监控
#
#############################################
function stop_programs
{
conf_proc=
isrun_proc=
right=0
conf_num=0
isrun_num=0
kill_run=0
## 用ps查询程序信息,写入临时文件中
ps -ef | grep ${PROC_KEY} | grep -v grep | awk '{print $8,$9,$2}' > ${TMP_FILE}
echo " [ Current Programs Running Information ]"
echo "-----------------------------------------"
## 逐一检查程序信息
ii=0
while read cur_p_name
do
isrun_proc[${ii}]=${cur_p_name}
ii=$(expr ${ii} + 1 )
echo " ${ii} ) ${cur_p_name}"
done<${TMP_FILE}
isrun_num=${ii}
echo "There are ${ii} programs."
echo " "
###########
ii=0
echo " [ Program Config File ] "
echo "-----------------------------------------"
while read procname
do
linelen=`echo -n ${procname} | wc -c`
if [[ 0 == ${linelen} ]]; then
continue
fi
conf_proc[${ii}]=${procname}
ii=$(expr ${ii} + 1 )
echo " ${ii} ) ${procname}"
done<${PROC_CFG}
conf_num=${ii}
echo "There are ${ii} programs in ${PROC_CFG} file."
echo " "
echo " [ Kill Program Server Programs ]"
i=0
## 主流程
while (( i < ${conf_num} ))
do
y=0
is_right=0
while(( y < ${isrun_num} ))
do
#echo " ${conf_proc[${i}]}----${isrun_proc[${y}]} "
comp_str ${conf_proc[${i}]} ${isrun_proc[${y}]}
if [[ $? == 0 ]]; then
#echo " `date +'%Y-%m-%d %T'`: ${isrun_proc[${y}]} will is killed."
kill_proc ${isrun_proc[${y}]}
((kill_run+=1))
break
fi
((y+=1))
done
((i+=1))
done
## 删除临时程序信息文件
rm -f ${TMP_FILE}
echo "========================================="
echo " Killed ${kill_run} program(s)."
echo " "
if [[ ${kill_run} -ne 0 ]]; then
echo " [ Current Programs Running Information ]"
echo "-----------------------------------------"
ps -ef | grep ${PROC_KEY} | grep -v grep | awk '{print $2,$8,$9}'
fi
echo " "
echo " ${PROC_MONITOR} Exit."
}
#############################################
#
# 清除运行:检测程序状态,并启动异常的程序
#
#############################################
function clean_files
{
cd ${PROC_BIN}
echo " Clean Dir: log temp"
rm -rf log/* temp/*
echo " Clean log files"
rm -f *.log
rm -f nohup.out
}
#############################################
#
# 结束自己的兄弟进程,但要过滤自己的进程
#
#############################################
function stop_script
{
TMP_FILE=${PROC_BIN}/${SELF_PID}.ps
ps -ef | grep ${SELF_NAME} | grep -v grep | grep -v ${SELF_PID} | awk '{print $2}' > ${TMP_FILE}
while read proc_id
do
echo " -- kill ${proc_id} script process"
echo " -- kill ${proc_id} script process" >> ${MONITOR_LOG}
kill ${proc_id}
done<${TMP_FILE}
rm -f ${TMP_FILE}
}
#############################################
#
# 检查是否存在兄弟进程, 存在则返回1,否则0
#
#############################################
function check_script_is_running
{
pids=`ps -ef | grep ${SELF_NAME} | grep -v grep | grep -v ${SELF_PID} | awk '{print $2}'`
if [[ "${pids}" == "" ]]; then
return 0
else
return 1
fi
}
###########################################
############# 主处理流程 ##################
###########################################
show_version
if [[ -z ${MONITOR_PARAM} ]]; then
monitor_programs
else
if [[ "${MONITOR_PARAM}" == "check" ]]; then
monitor_programs
elif [[ "${MONITOR_PARAM}" == "monitor" ]]; then
## 检查是否有监控,没有则启动循环监控, 有则退出
check_script_is_running
if [[ $? == 1 ]]; then
echo " ${PROC_MONITOR} is running."
echo " "
ps -ef | grep ${SELF_NAME} | grep -v grep | grep -v ${SELF_PID}
echo " "
else
while (( 1 ))
do
monitor_programs >> /dev/null
sleep ${SLEEP_VAL}
done
fi
elif [[ "${MONITOR_PARAM}" == "start" ]]; then
check_script_is_running
if [[ $? == 1 ]]; then
echo " ${PROC_MONITOR} is running."
echo " "
ps -ef | grep ${SELF_NAME} | grep -v grep | grep -v ${SELF_PID}
echo " "
else
init_monitor_config
while (( 1 ))
do
monitor_programs >> /dev/null
sleep ${SLEEP_VAL}
done
fi
elif [[ "${MONITOR_PARAM}" == "stop" ]]; then
## delete user crontab job
clear_monitor_config
## 结束自己的兄弟进程,但要过滤自己的进程
stop_script
## 结束程序和crontab作业
stop_programs
elif [[ "${MONITOR_PARAM}" == "clean" ]]; then
clean_files
elif [[ "${MONITOR_PARAM}" == "help" ]]; then
show_help
else
echo " Parameter Error: ${MONITOR_PARAM}"
echo " please usage: $1 help"
fi
fi
echo " "
###########################################
bash 监控程序运行
最新推荐文章于 2022-06-05 09:49:22 发布