flink版本:1.12.4
#!/bin/bash
job_name=
start_log=
stop_log=
savepointpath=
#如果之前已经启动,从start_log里面拿到jobid
#根据job_name从yarn拿到application id
#根据jobid和application id停止任务,注意缺少application id可能停止不了任务
#start_log应该可以拿到jobid和application id,要看日志打印全不全
#hdfs://emr-cluster/flink/savepoints是一个目录,savepoint生成时会有一个随机数子目录生成
#stop过程可能要耗时,所以睡眠10s
if [ -f $start_log ] ; then
jobid=$(grep 'Job has been submitted with JobID' $start_log | awk -F ' ' '{print $NF}')
yid=$(yarn application -list | grep ${job_name} | awk -F ' ' '{print $1}')
${FLINKHOME}/bin/flink stop -p hdfs://emr-cluster/flink/savepoints -D rest.port=8083 -d -yid $yid $jobid > $stop_log 2>&1
savepointpath=$(grep 'Savepoint completed' $stop_log | awk -F ' ' '{print $NF}')
sleep 10s
yid=$(yarn application -list -appStates RUNNING | awk -F ' ' '{print $1}' | grep $yid)
if [ "$yid" != "" ] ; then
yarn application -kill $yid
fi
fi
#从stop_log中拿到savepoint路径,如果是首次启动savepoint是空
if [ -f $stop_log ] ; then
savepointpath=$(grep 'Savepoint completed' $stop_log | awk -F ' ' '{print $NF}')
if [ "$savepointpath" != "" ] ; then
savepointpath=" -s $savepointpath"
fi
fi
${FLINKHOME}/bin/flink run $savepointpath -m yarn-cluster -ynm ${job_name} -yD rest.port=8083 -c com.TestJob -d service-1.0.jar $start_log 2>&1
savepointpath:
hdfs://emr-cluster/flink/savepoints/savepoint-a68877-43ad4f697e11
验证:
更好的思路:
把savepoint路径和任务信息同时保存到mysql,防止丢失
奇怪的坑:
如果不加 rest.port=8083 参数,我这边会报奇怪错误,实际不一定要加,看情况。