bash 监控程序运行

#!/bin/bash
#############################################
# Define Variables
#############################################
PROC_MONITOR='Program Server Monitor'           ## 脚本名称和版本
MONITOR_VER='(1.4)'

## set Program path     
PROC_HOME=/home/oracle/project                  ## Program 主目录
PROC_BIN=${PROC_HOME}/bin                       ## 程序所在目录

##
SELF_PID=$$                                     ## 脚本运行时的PID
SELF_NAME=proc_monitor.sh                       ## 脚本文件名
MONITOR_PARAM=$1                                ## 运行脚本的参数一
MONITOR_LOG=${PROC_BIN}/proc_monitor.log        ## 监控脚本的日志文件

##
PROC_KEY='Server'                               ## 查询程序命令信息的key
TMP_FILE=${PROC_BIN}/${PROC_KEY}.ps             ## 临时用ps查询出的程序信息
PROC_CFG=${PROC_BIN}/proc_config.cfg            ## 需要检查的程序配置文件( 程序名 参数1 ...)
CRON_CFG=${PROC_BIN}/proc_cron.cfg              ## crontab config
SLEEP_VAL=3                                     ## 监控时每次检查的时间间隔

## set LD_LIBRARY_PATH for run programs
source ${PROC_BIN}/proc.bashrc

#############################################
# Define Functions
#############################################
# show the monitor's version
#############################################
function show_version
{
    echo " "
    echo "========================================="
    echo "    ${PROC_MONITOR} ${MONITOR_VER}"
    echo "========================================="
    echo " "
    echo " Current Time : " `date +"%Y-%m-%d %T"`
    echo " Process ID   : " ${SELF_PID}
    echo " Parameters   : " ${MONITOR_PARAM}
    echo "========================================="
}
#############################################
#
#  运行用户程序,参数=程序名+参数1
#
#############################################
function run_proc
{
    echo "   Run: $1 $2"
    cd ${PROC_BIN}
    #touch $PROC_BIN/$1.log
    echo "`date +'%Y-%m-%d %T'` exec nohup $1 $2 " >> ${MONITOR_LOG}
    exec nohup $1 $2  > /dev/null 2>&1  &
}
#############################################
#
# 比较字符串,相等返回0,否则1
#
#############################################
function comp_str
{
   #echo ":: comp_str: $1 != $3 || $2 != $4 "
   if [[ $1 != $3 || $2 != $4 ]]; then
        return 1
   else 
        return 0
   fi
}
#############################################
#
#  检查是否存在指定程序, 参数=程序名+参数1
#  程序存在返回1,否则0
#  
#############################################
function check_proc_is_running
{
    ret=`ps -ef | grep "$*" | grep -v grep`
    if [[ "${ret}" == "" ]]; then
        return 0
    else
        return 1
    fi
}
#############################################
#
# 显示帮助
#
#############################################
function show_help
{
   echo " "
   echo " [ Usage ]:"
   echo "   start   - initialize crontab job, and run Program programs"
   echo "   monitor - monitor(check | run) all Program programs"
   echo "   check   - check all Program programs, [default]"
   echo "   stop    - stop all Program programs"
   echo "   clean   - clean all Program log files and so on"
   echo "   help    - show help"
   echo " "
}
#############################################
#
# 监控程序:检测程序状态,并启动异常的程序
#
#############################################
function monitor_programs
{
    conf_proc=
    conf_num=0
    new_run=0

    ## 用ps查询程序信息
    echo " [ Current Programs Running Information ]"
    echo "-----------------------------------------"
    ps -ef | grep ${PROC_KEY} | grep -v grep | awk '{print $8,$9}'
    echo " "
    
    ###########
    ii=0
    echo " [ Program Config File ] "
    echo "-----------------------------------------"
    while read procname
    do
        linelen=`echo -n ${procname} | wc -c`
        if [[ 0 == ${linelen} ]]; then
            continue
        fi

        conf_proc[${ii}]=${procname}
        ii=$(expr ${ii} + 1 )
        echo " ${ii} ) ${procname}"
    done<${PROC_CFG}

    conf_num=${ii}
    echo "There are ${ii} programs in ${PROC_CFG} file."
    echo " "

    echo " [ checking programs information ]"
    echo "-----------------------------------------"

    i=0
    ## 主流程
    while (( i < ${conf_num} ))
    do      
        check_proc_is_running ${conf_proc[${i}]}

        if [[ $? == 1 ]]; then
            echo " $(expr ${i} + 1 ) ) : ${conf_proc[${i}]} is OK."
        else
            ######## 进程不存在或者异常,则重新拉起
            echo "  $(expr ${i} + 1 ) ) : ${conf_proc[${i}]} not exist or exception, will restart run..."
            run_proc ${conf_proc[${i}]}
            ((new_run+=1))
        fi

        ((i+=1))
    done
 
    echo "========================================="
    echo " New run ${new_run} program(s)."
    echo " "

    if [[ ${new_run} -ne 0 ]]; then
        sleep 3
        echo " [ Current Programs Running Information ]"
        echo "-----------------------------------------"
        ps -ef | grep ${PROC_KEY} | grep -v grep | awk '{print $2,$8,$9}'
    fi
    
    echo " "
    echo " ${PROC_MONITOR} Exit."
}
#############################################
#
# 初始化:配置系统定时启动检测任务
#
#############################################
function init_monitor_config
{
    echo " Initialize monitor config -- create user crontab job"
    crontab ${CRON_CFG}
    crontab -l
    echo " "
}
 #############################################
#
# 清除系统定时检测任务
#
#############################################
function clear_monitor_config
{
    echo " clear monitor config -- delete user crontab job"
    crontab -r
    echo " "
}
#############################################
#
#  Kill程序,参数=程序名+参数1
#
#############################################
function kill_proc
{
    echo  "-- kill $3 $1 $2"
    echo  "`date +'%Y-%m-%d %T'` kill $3 $1 $2 " >> ${MONITOR_LOG}
    kill $3
}
#############################################
#
# 终止程序:关闭所有程序, 并停止系统定时监控
#
#############################################
function stop_programs
{
    conf_proc=
    isrun_proc=
    right=0
    conf_num=0
    isrun_num=0
    kill_run=0

    ## 用ps查询程序信息,写入临时文件中
    ps -ef | grep ${PROC_KEY} | grep -v grep | awk '{print $8,$9,$2}' > ${TMP_FILE}
    echo " [ Current Programs Running Information ]"
    echo "-----------------------------------------"

    ## 逐一检查程序信息
    ii=0
    while read cur_p_name
    do
        isrun_proc[${ii}]=${cur_p_name}
        ii=$(expr ${ii} + 1 )
        echo " ${ii} ) ${cur_p_name}"
    done<${TMP_FILE}
    
    isrun_num=${ii}   
    echo "There are ${ii} programs."
    echo " "

    ###########
    ii=0
    echo " [ Program Config File ] "
    echo "-----------------------------------------"
    while read procname
    do
        linelen=`echo -n ${procname} | wc -c`
        if [[ 0 == ${linelen} ]]; then
            continue
        fi

        conf_proc[${ii}]=${procname}
        ii=$(expr ${ii} + 1 )
        echo " ${ii} ) ${procname}"
    done<${PROC_CFG}

    conf_num=${ii}
    echo "There are ${ii} programs in ${PROC_CFG} file."
    echo " "

    echo " [ Kill Program Server Programs ]"


    i=0
    ## 主流程
    while (( i < ${conf_num} ))
     do
       y=0
       is_right=0

       while(( y < ${isrun_num} ))
        do

          #echo " ${conf_proc[${i}]}----${isrun_proc[${y}]} "
          comp_str ${conf_proc[${i}]} ${isrun_proc[${y}]}

          if [[ $? == 0 ]]; then
             #echo " `date +'%Y-%m-%d %T'`: ${isrun_proc[${y}]} will is killed."
             kill_proc ${isrun_proc[${y}]}
             ((kill_run+=1))
             break
          fi

          ((y+=1))
        done

        ((i+=1))
     done

    ## 删除临时程序信息文件
    rm -f ${TMP_FILE}

    echo "========================================="
    echo " Killed ${kill_run} program(s)."
    echo " "

    if [[ ${kill_run} -ne 0 ]]; then
        echo " [ Current Programs Running Information ]"
        echo "-----------------------------------------"
        ps -ef | grep ${PROC_KEY} | grep -v grep | awk '{print $2,$8,$9}'
    fi
    
    echo " "
    echo " ${PROC_MONITOR} Exit."
}
#############################################
#
# 清除运行:检测程序状态,并启动异常的程序
#
#############################################
function clean_files
{
    cd ${PROC_BIN}
    echo " Clean Dir: log temp"
    rm -rf log/* temp/*

    echo " Clean log files"
    rm -f *.log

    rm -f nohup.out
}
#############################################
#
# 结束自己的兄弟进程,但要过滤自己的进程
#
#############################################
function stop_script
{
    TMP_FILE=${PROC_BIN}/${SELF_PID}.ps

    ps -ef | grep ${SELF_NAME} | grep -v grep | grep -v ${SELF_PID} | awk '{print $2}' > ${TMP_FILE}
    
    while read proc_id
    do
        echo " -- kill ${proc_id} script process"
        echo " -- kill ${proc_id} script process" >> ${MONITOR_LOG}
        kill ${proc_id}
    done<${TMP_FILE}
    
    rm -f ${TMP_FILE}
}
#############################################
#
# 检查是否存在兄弟进程, 存在则返回1,否则0
#
#############################################
function check_script_is_running
{
    pids=`ps -ef | grep ${SELF_NAME} | grep -v grep | grep -v ${SELF_PID} | awk '{print $2}'`
    
    if [[ "${pids}" == "" ]]; then
        return 0
    else
        return 1
    fi
}
###########################################
############# 主处理流程 ##################
###########################################
    show_version

    if [[ -z ${MONITOR_PARAM} ]]; then
        monitor_programs
    else
        if [[ "${MONITOR_PARAM}" == "check" ]]; then
            monitor_programs
        elif [[ "${MONITOR_PARAM}" == "monitor" ]]; then
            ## 检查是否有监控,没有则启动循环监控, 有则退出
            check_script_is_running
            if [[ $? == 1 ]]; then
                echo " ${PROC_MONITOR} is running."
                echo " "
                ps -ef | grep ${SELF_NAME} | grep -v grep | grep -v ${SELF_PID}
                echo " "
            else
                while (( 1 ))
                do
                    monitor_programs >> /dev/null
                    sleep ${SLEEP_VAL}
                done
            fi
        elif [[ "${MONITOR_PARAM}" == "start" ]]; then
            check_script_is_running
            if [[ $? == 1 ]]; then
                echo " ${PROC_MONITOR} is running."
                echo " "
                ps -ef | grep ${SELF_NAME} | grep -v grep | grep -v ${SELF_PID}
                echo " "
            else
                init_monitor_config
                while (( 1 ))
                do
                    monitor_programs >> /dev/null
                    sleep ${SLEEP_VAL}
                done
            fi
        elif [[ "${MONITOR_PARAM}" == "stop" ]]; then
            ## delete user crontab job
            clear_monitor_config
            
            ## 结束自己的兄弟进程,但要过滤自己的进程
            stop_script
            
            ## 结束程序和crontab作业
            stop_programs
        elif [[ "${MONITOR_PARAM}" == "clean" ]]; then
            clean_files
        elif [[ "${MONITOR_PARAM}" == "help" ]]; then
            show_help
        else
            echo " Parameter Error: ${MONITOR_PARAM}"
            echo " please usage: $1 help"
        fi
    fi

    echo " "
###########################################

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值