1、定时启动worker进程
#!/bin/bash
start_path=/root/start_odps3.sh
worker_num=`ps -ef|grep -v grep|grep transfer|wc -l `
if [[ $worker_num <=2 ]];then
$start_path
sleep 30
echo "已经起了<---------${worker_num}个---------------->进程"
else
echo "已经起了<---------${worker_num}个---------------->进程"
exit 122
fi
2、定时检测日志不更新的worker进程,并查杀
#!/bin/bash
start_path=/root/start_odps3.sh
host=`hostname`
log_path=/mnt/transfer_*.log
start_size=`stat -c "%n %s" ${log_path} > start.txt`
sleep 600
stop_size=`stat -c "%n %s" ${log_path} > stop.txt`
########################【执行前初始化】##################################
function ini
{
echo "初始化/mnt目录文件" && find /mnt -type f -mtime +1 |xargs rm -f
transfer_pro=`ps -ef|grep -v grep |grep -oP '(?<=port\=)\d+(?= )' > pro`
log_coun=`ls /mnt/|egrep -o 890[0-9]{1} > log`
mv_log_file=`grep -vwf pro log`
for i in `echo $mv_log_file`
do
ls /mnt|grep ''$i''|xargs -i rm /mnt/{}
done
mv_tra_process=`grep -vwf log pro`
for i in `echo $mv_tra_process`
do
ps -ef |grep -v grep|grep "port=$i"|awk '{print $2}'|xargs -i kill {}
done
}
ini
function worker
{
worker_num=`ps -ef |grep 'max-compute-transfer'|grep -v grep |wc -l`
if [[ $worker_num < 6 ]];then
$start_patho
if [[ $? == 0 ]];then
echo " 进程启动成功"
sleep 30
echo "已经起了<---------${worker_num}个---------------->进程"
fi
else
echo "已经起了<---------${worker_num}个---------------->进程"
exit 122
fi
}
function k
{
transfer_pid=`grep -wf start.txt stop.txt|egrep -o '89[0-9]{2}'`
transfer_log=`grep -wf start.txt stop.txt`
kill_transfer=`ps -ef |grep -v grep|grep "port=$transfer_pid"|awk '{print $2}'`
for i in `echo $kill_transfer`
do
rm -f $transfer_log
echo "${time}---transfer日志文件---${i}---停止增长!!" >>error.txt
echo "[transfer-worker_${i}]进程异常即将重启进程---${date}" >> error.txt
echo $i |xargs kill && echo "$i--进程被杀死!!" >> error.txt
done
# worker
}
error_transfer=`grep -wf start.txt stop.txt`
if [[ -z $error_transfer ]];then
echo "程序运行正常---`date`" >> ok.txt
else
k
fi
3、定时crontab配置
cat root /var/spool/cron
* * * * * sh /root/worker.sh
*/12 * * * * sh /root/worker_kill.sh