在没有调度器的情况下，怎么用shell模拟oozie调度器，满足数仓任务之间的前后依赖

不想起的昵称

于 2021-11-25 14:08:42 发布

阅读量1.1k

点赞数 2

分类专栏： hive 文章标签：大数据数据仓库 hive

本文链接：https://blog.csdn.net/weixin_40267121/article/details/121519110

版权

hive 专栏收录该内容

42 篇文章 9 订阅

订阅专栏

在没有调度器的情况下，数仓任务之间怎么进行前后依赖呢？
先看看现有调度器是怎么进行前后任务依赖的？
oozie：根据实例判断，任务成功创建实例，_success结尾的实例文件
azkaban：根据任务状态判断
xxl-job：根据任务状态判断
…
dolphinscheduler：根据任务状态判断

根据oozie调度器的原理，我们用shell进行模仿一下：

对于数仓任务，我们通过检测脚本的最后一张表是否有数据来判断任务是否成功
数仓任务成功之后，创建一个实例文件，给下游数仓任务创造可执行的条件
下游数仓任务检测不到上游数仓任务成功后的实例文件，则等待直到设置超时时间

1.循环检测上游数仓任务实例，检测到则break；检测不到则一直等待，直到我们设置的超时时间。如果超时的话，则创建一个控制文件，避免数仓任务执行。

shelldir="/data/dw_center/${shellname}"
shelllog=${shelldir}"/shelllog/${shellname}.log"
shellcontroll=${shelldir}"/shellcontroll/${shellname}_${varcurdate}"
depend_table="ods_app_main_repairimei"
depend_file="/data/ods_center/ods_app_main_repairimei/mark/${depend_table}_${varcurdate}"

count=0

while true
do
  if [[ -f ${depend_file} ]]; then
    echo "depend_file is exit:${depend_file}" >> ${shelllog}
    break
  fi
  if [ ${count} -eq 666 ]; then
    touch ${shellcontroll}
    echo "####################上层依赖文件没生成，超时666分钟，请查看日志！########################" >> ${shelllog}
    break
  fi
  echo "####################上层依赖文件没生成，超时${count}分钟，请查看日志！########################" >> ${shelllog}
  count=$[${count}+1]
  sleep 1m
done

注：如果依赖上游多张表，可以添加多个depend_file
2.上游数仓任务实例存在，执行数仓任务，把执行日志打印到日志文件中；如果上游实例不存在，检测超时，则不执行数仓任务

if [[ ! -f ${shellcontroll} ]]; then
  touch ${shellcontroll}
  echo "create controllfile" >> ${shelllog}
${spark} --name ${shellname}_${varcurdate} --master yarn  --queue dw_offline  --conf spark.port.maxRetries=100  --driver-memory 5G  --num-executors 15  --executor-memory 15G  --executor-cores 20 -v -f ${shelldir}/${hivesqlexe} >> ${shelllog} 2>&1
if

3.把日志文件中系统错误日志过滤掉，其他执行错误信息输出到另一个报警文件中，作为数仓任务执行失败输出的报错信息

grep -i "fail\|error\|Error\|Failure\|Errno" ${shelllog}| grep -v "Failed to get database default, returning NoSuchObjectException\|ERROR LiveListenerBus\|ERROR TransportResponseHandler\|ERROR KeyProviderCache\|ERROR YarnScheduler\|ERROR YarnClientSchedulerBackend" > ${shelldir}/${tablename}_monitor.txt

4.检测数仓脚本最后一张表是否有数据，如果没有数据进行钉钉报警，并输出报错信息

#发送信息到钉钉
send_message_to_dingding() {
  api=xxxxxx
  phone=xxxxxx
  port=xxxxxx
  ip=xxxxxx
  title=" 【数仓任务${shellname}报错】: "
  content=" 【详细请查看日志 ${shelllogtime}】 "
  atMobiles=\"${phone}\"
  cmd="curl '"${api}"' -H 'Content-Type: application/json' \
   -d '
  {\"msgtype\": \"text\", 
    \"text\": {
        \"content\": \"${title}${content}\n${msg}\"
     },
     \"at\": {
        \"atMobiles\": [${atMobiles}], 
        \"isAtAll\": false
    }
  }'"
  ssh -p ${port} ${ip} ${cmd}
  }

#判断日志里面有没有错误信息，有则取错误信息，没有标记ERROR
monitor_text_is_exist() {
  if [[ -s ${shellmonitor}/${shellname}_monitor.txt ]];then
     msg=`cat ${shellmonitor}/${shellname}_monitor.txt`
     send_message_to_dingding
  else msg='ERROR'
     send_message_to_dingding
  fi
}

#先检测当日分区存不存在，不存在直接报警，存在则检测分区里面有没有文件，如有文件，则创建实例，供下游数仓依赖
monitor(){  
  ${hdfs} dfs -test -d /user/hive/warehouse/${dbname}.db/${tablename}/dt=${varcurdate}
  if [ $? -eq 0 ]; then
  count=`${hdfs} dfs -count /user/hive/warehouse/${dbname}.db/${tablename}/dt=${varcurdate} | awk '{ print $2 }'`
  echo "${dbname}.${tablename} dt=${varcurdate}的文件个数为：${count}" >> ${shelllog}
  if [ ${count} -lt 1 ];then
    echo "####################################This task is failed!####################################"
    monitor_text_is_exist
  else
     #数据存在则创建标志文件
    touch ${mark_file}
    echo "创建标志文件：${mark_file}" >> ${shelllog}
    echo "####################################This task is successful!####################################" >> ${shelllog}
  fi  
  else 
     echo "####################################This task is failed!####################################" >> ${shelllog}
     monitor_text_is_exist
  fi
}

#执行监控  
monitor