-
需求
执行shell脚本 → 执行MR程序 → 执行hive程序
-
1.准备工作目录
cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works mkdir -p sereval-actions
-
2.准备调度文件
cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works cp hive2/script.q sereval-actions/ cp shell/hello.sh sereval-actions/ cp -ra map-reduce/lib sereval-actions/
-
3.开发调度的配置文件
cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works/sereval-actions vim workflow.xml
<workflow-app xmlns="uri:oozie:workflow:0.4" name="shell-wf"> <start to="shell-node"/> <action name="shell-node"> <shell xmlns="uri:oozie:shell-action:0.2"> <job-tracker>${jobTracker}</job-tracker> <name-node>${nameNode}</name-node> <configuration> <property> <name>mapred.job.queue.name</name> <value>${queueName}</value> </property> </configuration> <exec>${EXEC}</exec> <!-- <argument>my_output=Hello Oozie</argument> --> <file>/user/root/oozie_works/sereval-actions/${EXEC}#${EXEC}</file> <capture-output/> </shell> <ok to="mr-node"/> <error to="mr-node"/> </action> <action name="mr-node"> <map-reduce> <job-tracker>${jobTracker}</job-tracker> <name-node>${nameNode}</name-node> <prepare> <delete path="${nameNode}/${outputDir}"/> </prepare> <configuration> <property> <name>mapred.job.queue.name</name> <value>${queueName}</value> </property> <!-- <property> <name>mapred.mapper.class</name> <value>org.apache.oozie.example.SampleMapper</value> </property> <property> <name>mapred.reducer.class</name> <value>org.apache.oozie.example.SampleReducer</value> </property> <property> <name>mapred.map.tasks</name> <value>1</value> </property> <property> <name>mapred.input.dir</name> <value>/user/${wf:user()}/${examplesRoot}/input-data/text</value> </property> <property> <name>mapred.output.dir</name> <value>/user/${wf:user()}/${examplesRoot}/output-data/${outputDir}</value> </property> --> <!-- 开启使用新的API来进行配置 --> <property> <name>mapred.mapper.new-api</name> <value>true</value> </property> <property> <name>mapred.reducer.new-api</name> <value>true</value> </property> <!-- 指定MR的输出key的类型 --> <property> <name>mapreduce.job.output.key.class</name> <value>org.apache.hadoop.io.Text</value> </property> <!-- 指定MR的输出的value的类型--> <property> <name>mapreduce.job.output.value.class</name> <value>org.apache.hadoop.io.IntWritable</value> </property> <!-- 指定输入路径 --> <property> <name>mapred.input.dir</name> <value>${nameNode}/${inputdir}</value> </property> <!-- 指定输出路径 --> <property> <name>mapred.output.dir</name> <value>${nameNode}/${outputDir}</value> </property> <!-- 指定执行的map类 --> <property> <name>mapreduce.job.map.class</name> <value>org.apache.hadoop.examples.WordCount$TokenizerMapper</value> </property> <!-- 指定执行的reduce类 --> <property> <name>mapreduce.job.reduce.class</name> <value>org.apache.hadoop.examples.WordCount$IntSumReducer</value> </property> <!-- 配置map task的个数 --> <property> <name>mapred.map.tasks</name> <value>1</value> </property> </configuration> </map-reduce> <ok to="hive2-node"/> <error to="fail"/> </action> <action name="hive2-node"> <hive2 xmlns="uri:oozie:hive2-action:0.1"> <job-tracker>${jobTracker}</job-tracker> <name-node>${nameNode}</name-node> <prepare> <delete path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/hive2"/> <mkdir path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data"/> </prepare> <configuration> <property> <name>mapred.job.queue.name</name> <value>${queueName}</value> </property> </configuration> <jdbc-url>${jdbcURL}</jdbc-url> <script>script.q</script> <param>INPUT=/user/${wf:user()}/${examplesRoot}/input-data/table</param> <param>OUTPUT=/user/${wf:user()}/${examplesRoot}/output-data/hive2</param> </hive2> <ok to="end"/> <error to="fail"/> </action> <decision name="check-output"> <switch> <case to="end"> ${wf:actionData('shell-node')['my_output'] eq 'Hello Oozie'} </case> <default to="fail-output"/> </switch> </decision> <kill name="fail"> <message>Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> </kill> <kill name="fail-output"> <message>Incorrect output, expected [Hello Oozie] but was [${wf:actionData('shell-node')['my_output']}]</message> </kill> <end name="end"/> </workflow-app>
开发job.properties配置文件
cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works/sereval-actions vim job.properties
nameNode=hdfs://node01:8020 jobTracker=node01:8032 queueName=default examplesRoot=oozie_works EXEC=hello.sh outputDir=/oozie/output inputdir=/oozie/input jdbcURL=jdbc:hive2://node03:10000/default oozie.use.system.libpath=true # 配置我们文件上传到hdfs的保存路径 实际上就是在hdfs 的/user/root/oozie_works/sereval-actions这个路径下 oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/sereval-actions/workflow.xml
-
4.上传资源文件夹到hdfs对应路径
cd /export/servers/oozie-4.1.0-cdh5.14.0/oozie_works/ hdfs dfs -put sereval-actions/ /user/root/oozie_works/
-
5.执行调度任务
cd /export/servers/oozie-4.1.0-cdh5.14.0/ bin/oozie job -oozie http://node03:11000/oozie -config oozie_works/serveral-actions/job.properties -run
【Hadoop离线基础总结】oozie任务串联
最新推荐文章于 2021-02-20 15:54:23 发布