前言
大数据开发中,oozie和azkaban算是应用比较广泛的调度框架了。
oozie功能很强大,但是实际开发中我们应用的其实只有shell这一种action类型。
痛点在于编写workflow文件的繁复,闲来开发了一个自动生成action节点的脚本,可以减少部分的开发工作,如果有有所bug还望见谅 ~
一、需加入调度的脚本路径
$ pwd
/home/dmp_operator1/OozieApps/oozie_union
# 本地目录树
$ tree test_ads/
test_ads/
├── 1
│ ├── 1_1.sh
│ ├── 1_2.sh
│ └── 1_3.sh
├── 2
│ ├── 2_1.sh
│ ├── 2_2.sh
│ └── 2_3.sh
└── 3
├── 3_1.sh
├── 3_2.sh
└── 3_3.sh
# 使用readlink 提取出脚本文件的绝对路径
$ ls -a test_ads/*/*.sh | xargs readlink -f
/home/dmp_operator1/OozieApps/oozie_union/test_ads/1/1_1.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/1/1_2.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/1/1_3.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_1.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_2.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_3.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_1.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_2.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_3.sh
# OozieApps为所有oozie项目的主目录
# oozie_union为此次测试的项目目录
二、使用步骤
1. 编写脚本gen_action.sh
#!/bin/bash
if [ $# -lt 1 ] ; then
echo "传参格式:
fork - 并行类型 : fork_name join_name sh1 sh2 sh3 ...
非fork - 串行类型 : sh1 sh2 sh3 ..."
exit 0
fi
# 判断是否为fork类型, 并获取action脚本起始位置
sh_start=1
if [[ $1 != *.sh ]] && [[ $2 != *.sh ]] ; then
sh_start=3
fork_name=$1
join_name=$2
fi
# 单ation逻辑
gen_workflow(){
shell_path=`readlink -f $1 | cut -d/ -f 6-`
action_name=`basename $1 | cut -d\. -f 1`
action_next_name=`basename $2 | cut -d\. -f 1`
if [ $sh_start -eq 3 ] ; then
action_next_name=$join_name
fi
echo "
<action name=\""$action_name"\">
<shell xmlns="uri:oozie:shell-action:0.2">
<job-tracker>\${jobTracker}</job-tracker>
<name-node>\${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>\${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.memory.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4096m</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.resource.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx4096m</value>
</property>
</configuration>
<exec>"$shell_path"</exec>
<argument>\${(wf:actionData('shell_date')['etl_date'])}</argument>
<file>"$shell_path"</file>
<capture-output/>
</shell>
<ok to=\""$action_next_name"\"/>
<error to=\"fail\"/>
</action>
"
}
# 拼接fork部分 【超过9个参数时 取值必须使用{}, 即${10}】
gen_fork(){
echo "<fork name=\""$fork_name"\">"
# shift ; shift # 移除[覆盖]开头的fork和join
for ((i=$sh_start;i<=$#;i++))
do
echo " <path start=$(basename $( eval echo $(echo \${$i}) ) | cut -d\. -f 1)/>"
done
echo '</fork>'
}
gen_join(){
# 拼接join部分
echo "<join name=\""$join_name"\" to=\"xxx_end\"/>"
}
if [ $sh_start -eq 3 ] ; then
gen_fork $@
fi
# 遍历生成action
for ((i=$sh_start;i<=$#;i++))
do
action1=$( eval echo $(echo \${$i}) )
# 类型为非fork时 最后一个action跳转到end
if [ $i -eq $# ] ; then
action2=end
else
action2=$( eval echo $(echo \${$[i+1]}) )
fi
echo $action1 $action2
gen_workflow $action1 $action2
done
if [ $sh_start -eq 3 ] ; then
gen_join
fi
2. 测试
# 先注释掉倒数第6行的 gen_workflow $action1 $action2,暂时不生成action节点,查看运行逻辑
# 1. 参数格式
$ sh gen_action.sh
传参格式:
fork - 并行类型 : fork_name join_name sh1 sh2 sh3 ...
非fork - 串行类型 : sh1 sh2 sh3 ...
# 2. 测试fork类型
$ sh gen_action.sh my_fork my_join `ls -a test_ads/*/*.sh | xargs readlink -f`
<fork name="my_fork">
<path start=1_1/>
<path start=1_2/>
<path start=1_3/>
<path start=2_1/>
<path start=2_2/>
<path start=2_3/>
<path start=3_1/>
<path start=3_2/>
<path start=3_3/>
</fork>
/home/dmp_operator1/OozieApps/oozie_union/test_ads/1/1_1.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/1/1_2.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/1/1_2.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/1/1_3.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/1/1_3.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_1.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_1.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_2.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_2.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_3.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_3.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_1.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_1.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_2.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_2.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_3.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_3.sh end
<join name="my_join" to="xxx_end"/>
# 3. 测试串行类型
$ sh gen_action.sh `ls -a test_ads/*/*.sh | xargs readlink -f`
/home/dmp_operator1/OozieApps/oozie_union/test_ads/1/1_1.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/1/1_2.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/1/1_2.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/1/1_3.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/1/1_3.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_1.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_1.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_2.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_2.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_3.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/2/2_3.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_1.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_1.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_2.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_2.sh /home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_3.sh
/home/dmp_operator1/OozieApps/oozie_union/test_ads/3/3_3.sh end
3. 实操
# 取消掉 gen_workflow $action1 $action2 的注释,并注释掉调试打印信息 echo $action1 $action2
# fork并行版测试结果 (为避免篇幅过长,串行版可自行测试)
$ sh gen_action.sh my_fork my_join `ls -a test_ads/*/*.sh | xargs readlink -f`
<fork name="my_fork">
<path start=1_1/>
<path start=1_2/>
<path start=1_3/>
<path start=2_1/>
<path start=2_2/>
<path start=2_3/>
<path start=3_1/>
<path start=3_2/>
<path start=3_3/>
</fork>
<action name="1_1">
<shell xmlns=uri:oozie:shell-action:0.2>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.memory.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4096m</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.resource.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx4096m</value>
</property>
</configuration>
<exec>test_ads/1/1_1.sh</exec>
<argument>${(wf:actionData('shell_date')['etl_date'])}</argument>
<file>test_ads/1/1_1.sh</file>
<capture-output/>
</shell>
<ok to="my_join"/>
<error to="fail"/>
</action>
<action name="1_2">
<shell xmlns=uri:oozie:shell-action:0.2>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.memory.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4096m</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.resource.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx4096m</value>
</property>
</configuration>
<exec>test_ads/1/1_2.sh</exec>
<argument>${(wf:actionData('shell_date')['etl_date'])}</argument>
<file>test_ads/1/1_2.sh</file>
<capture-output/>
</shell>
<ok to="my_join"/>
<error to="fail"/>
</action>
<action name="1_3">
<shell xmlns=uri:oozie:shell-action:0.2>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.memory.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4096m</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.resource.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx4096m</value>
</property>
</configuration>
<exec>test_ads/1/1_3.sh</exec>
<argument>${(wf:actionData('shell_date')['etl_date'])}</argument>
<file>test_ads/1/1_3.sh</file>
<capture-output/>
</shell>
<ok to="my_join"/>
<error to="fail"/>
</action>
<action name="2_1">
<shell xmlns=uri:oozie:shell-action:0.2>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.memory.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4096m</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.resource.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx4096m</value>
</property>
</configuration>
<exec>test_ads/2/2_1.sh</exec>
<argument>${(wf:actionData('shell_date')['etl_date'])}</argument>
<file>test_ads/2/2_1.sh</file>
<capture-output/>
</shell>
<ok to="my_join"/>
<error to="fail"/>
</action>
<action name="2_2">
<shell xmlns=uri:oozie:shell-action:0.2>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.memory.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4096m</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.resource.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx4096m</value>
</property>
</configuration>
<exec>test_ads/2/2_2.sh</exec>
<argument>${(wf:actionData('shell_date')['etl_date'])}</argument>
<file>test_ads/2/2_2.sh</file>
<capture-output/>
</shell>
<ok to="my_join"/>
<error to="fail"/>
</action>
<action name="2_3">
<shell xmlns=uri:oozie:shell-action:0.2>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.memory.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4096m</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.resource.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx4096m</value>
</property>
</configuration>
<exec>test_ads/2/2_3.sh</exec>
<argument>${(wf:actionData('shell_date')['etl_date'])}</argument>
<file>test_ads/2/2_3.sh</file>
<capture-output/>
</shell>
<ok to="my_join"/>
<error to="fail"/>
</action>
<action name="3_1">
<shell xmlns=uri:oozie:shell-action:0.2>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.memory.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4096m</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.resource.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx4096m</value>
</property>
</configuration>
<exec>test_ads/3/3_1.sh</exec>
<argument>${(wf:actionData('shell_date')['etl_date'])}</argument>
<file>test_ads/3/3_1.sh</file>
<capture-output/>
</shell>
<ok to="my_join"/>
<error to="fail"/>
</action>
<action name="3_2">
<shell xmlns=uri:oozie:shell-action:0.2>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.memory.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4096m</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.resource.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx4096m</value>
</property>
</configuration>
<exec>test_ads/3/3_2.sh</exec>
<argument>${(wf:actionData('shell_date')['etl_date'])}</argument>
<file>test_ads/3/3_2.sh</file>
<capture-output/>
</shell>
<ok to="my_join"/>
<error to="fail"/>
</action>
<action name="3_3">
<shell xmlns=uri:oozie:shell-action:0.2>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.memory.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4096m</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.resource.mb</name>
<value>5120</value>
</property>
<property>
<name>oozie.launcher.yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx4096m</value>
</property>
</configuration>
<exec>test_ads/3/3_3.sh</exec>
<argument>${(wf:actionData('shell_date')['etl_date'])}</argument>
<file>test_ads/3/3_3.sh</file>
<capture-output/>
</shell>
<ok to="my_join"/>
<error to="fail"/>
</action>
<join name="my_join" to="xxx_end"/>
总结
需要注意的点大部分都在备注中提现,请仔细查看哦~
乍一看可能很复杂,但是逐行逐行慢慢看慢慢测试,相信大家一定可以理解并在实际的学习和工作中运用上的 ~