#!/bin/sh
#本脚本为守护进程, 如果发现进程僵死时, 自动重启进程
function check_and_kill_and_start(){
log_file=$1
ps_name=$2
dead_duration=$3
# echo "check the progress "$ps_name
ymd=$(date +%Y-%m-%d)
log_file=${log_file}${ymd}.log
# echo $log_file 判断是否存在log_file
if [ ! -e ${log_file} ];then
exit 0
fi
# %y 上次修改时间
last_modify=$(stat -c %Y ${log_file})
now=$(date +%s)
#如果上次写入日志的时间和现在的时间差大于 dead_duration 秒, 则重启进程
if [ $[ $now - $last_modify ] -gt $dead_duration ]; then
mail_subject='[报警][${ps_name}进程重启]'
enter='\\
'
mail_content='${ps_name}进程重启${enter}'
echo '========================='$(date +"%Y-%m-%d %H:%M:%S")'====================================='
echo [$log_file]' last update time ['$(date -d @$last_modify "+%Y-%m-%d %H:%M:%S")'], need restart progress '[$ps_name]
proc_id=$(ps -ef|grep -i ${ps_name}|grep -v "grep"|awk '{print $2}')
#echo 'the old ps is '${proc_id[@]}
if [[ -n $proc_id ]];then
echo 'old progress exist, kill it first'
for id in ${proc_id[*]}
do
# thread=$(ps -mp ${id}|wc -l)
#杀掉父进程和子进程
kill -TERM ${id}
if [ $? -eq 0 ];then
echo "kill the progress sucessfull"
else
echo "kill the progress failed "
fi
done
echo 'sleep l0s then start new progress'
fi
sleep 10s
/usr/local/bin/python27 /var/disk/web/gsm_project/score/${ps_name} &
if [ $? -eq 0 ];then
proc_id=$(ps -ef|grep -i ${ps_name}|grep -v "grep"|awk '{print $2}')
echo 'the new progress id is '${proc_id[@]}
else
echo "progress start failed, the cron will try to start it next time "
fi
#else
#echo '========================='$(date +"%Y-%m-%d %H:%M:%S")'====================================='
#echo [$log_file]' last update time ['$(date -d @$last_modify "+%Y-%m-%d %H:%M:%S")']'
fi
}
#echo '========================='$(date +"%Y-%m-%d %H:%M:%S")'====================================='
#执行方法 log文件路径 文件名 查找时间
check_and_kill_and_start /var/disk/logs/score_cache/server_gsm_score score_cache_match_5se.py 30
Linux 简单shell创建自己的守护进程,自动重启,监控进程运行
对于守护进程管理、自动重启、记录log,有一个很好用的进程管理工具supervisord。它可以说简单易用,且功能强大。但是对于我的部署需求来说还是过于繁琐,且没有搞定用它如何记录进程状态。
今天写了一个简单的shell脚本,以满足我的所有需求,并且部署简单。
希望能够给有同样需求的码农们提供一个参考。pgmctl.sh
#!/bin/bash
CMD="./mypgm"
PID="./log/PID.txt"
LOG="./log/mypgm.log"
DEBUG="false"
# ---------------------------------------------------
# 启动函数
function start {
$CMD -debug=$DEBUG server.ini >> $LOG 2>&1 &
mypgmpid=$!
echo $mypgmpid > $PID
echo "start [ok]"
}
# 停止函数
function stop {
kill `cat $PID`
rm $PID
echo "stop [ok]"
}
# --------------------------------------------------
echo "$CMD $1"
case "$1" in
start)
start
;;
start_debug)
DEBUG="true"
start
;;
restart)
if [ -f $PID ] ; then
stop
sleep 4
fi
start
;;
stop)
stop
exit 0
;;
esac
for (( c=0 ; ; c++ ))
do
if [ -f $PID ] ; then
mypgmpid=`cat $PID`
cmdex="ps uh -p$mypgmpid"
psrtn=`$cmdex`
if [ -z "$psrtn" ]; then
# 进程挂掉自动重启
echo "`date '+%Y/%m/%d %H:%M:%S'` FATALERROR RESTART SERVICE" >> $LOG
start
elif (( $c%20 == 0 )); then
# 记录进程运行状态
echo "`date '+%Y/%m/%d %H:%M:%S'` PSINFO $psrtn" >> $LOG
c=0
fi
sleep 3
else
break
fi
done
启动:./pgmctl.sh start &
debug 启动 : ./pgmctl.sh start_debug &
重启:./pgmctl.sh restart &
停止:./pgmctl.sh stop
此脚本只是示例,提供一个思路。到你正在用的时候,需要按照自己需求修改