hadoop学习工作总结(二)之hive流程

coordinator.xml 

<coordinator-app name="gbd-360-jkdm-oracle-cd"
 frequency="${coord:months(1)}" start="${job_start}" end="${job_end}"
 timezone="GMT+08:00" xmlns="uri:oozie:coordinator:0.2">
 <controls>
     <timeout>2</timeout>
  <concurrency>1</concurrency>
 </controls>
 <datasets>
  <dataset name="bankLogDataset" frequency="${coord:months(1)}"
   initial-instance="${datesets_initial}" timezone="GMT+08:00">
   <uri-template>${hdfs_sqoop_tmp}/${YEAR}${MONTH}${DAY}</uri-template>
  </dataset>
 </datasets>

 <output-events>
      <data-out name="logData" dataset="bankLogDataset">
          <instance>${coord:current(-1)}</instance>
      </data-out>
   </output-events>

    <action>
        <workflow>
            <app-path>${application_path}</app-path>
            <configuration>
                <property>
                    <name>sqoop_import_path</name>
                    <value>${coord:dataOut('logData')}</value>
                </property>
                <property>
                    <name>nominalformateDate</name>
                    <value>${coord:formatTime(coord:dateOffset(coord:nominalTime(), -1, 'MONTH'), "yyyyMMdd")}</value>
                </property>
            </configuration>
        </workflow>
    </action>
</coordinator-app>

 

gbd-360-jkdm-oracle.properties

###################ods import properties file###################
#sqoop import config
hdfs_sqoop_tmp=${namenode_address}/apps-data/hduser0522/gbd_360_safe/gbd-360-jkdm-oracle
hdfs_address_prefix=${namenode_address}/apps/hduser0522

#coordinator config
job_start=2014-08-24T02:00+0800
job_end=2099-12-30T02:00+0800
datesets_initial=2000-01-01T02:00+0800

#shell
seq1=seq1.sh
seq2=seq2.sh
seq3=seq3.sh
seq4=seq4.sh
seq5=seq5.sh

##oozie config
#use system library
oozie.use.system.libpath=true
application_path=${hdfs_address_prefix}/gbd_360_safe/gbd-360-jkdm-oracle

#oozie.bundle.application.path=${application_path}
oozie.coord.application.path=${application_path}

 

removeJob.sh

 

#!/bin/sh

j_username=`grep j_username /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`
j_password=`grep j_password /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`
oozie_url=`grep oozie_url /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`
doas_user=`grep doas_user /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`

oozie -Dheader:j_username=$j_username -Dheader:j_password=$j_password job --oozie $oozie_url  -auth ldap -kill $1 -doas $doas_user;

 

runJob.sh

#!/bin/sh

j_username=`grep j_username /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`
j_password=`grep j_password /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`
oozie_url=`grep oozie_url /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`
doas_user=`grep doas_user /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`
namenode_address=`grep namenode_address /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`
jobtracker_address=`grep jobtracker_address /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`
mapred_job_queue_name=`grep mapred_job_queue_name /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`
jk_connection=`grep jk_connection /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`
jk_username=`grep jk_username /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`
jk_password=`grep jk_password /appcom/apps/hduser0522/gbd_360_safe/server.env|awk 'BEGIN{FS=":::::"}{print $NF}'`

hadoop fs -rmr /apps-data/hduser0522/gbd_360_safe/gbd-360-jkdm-oracle;
hadoop fs -rmr /apps/hduser0522/gbd_360_safe/gbd-360-jkdm-oracle;
hadoop fs -put /appcom/apps/hduser0522/gbd_360_safe/gbd-360-jkdm-oracle/ /apps/hduser0522/gbd_360_safe/gbd-360-jkdm-oracle/;

oozie  -Dheader:j_username=$j_username -Dheader:j_password=$j_password job -Dnamenode_address=$namenode_address -Djobtracker_address=$jobtracker_address -Dmapred_job_queue_name=$mapred_job_queue_name -Djk_connection=$jk_connection -Djk_username=$jk_username -Djk_password=$jk_password  --oozie $oozie_url -config  /appcom/apps/hduser0522/gbd_360_safe/gbd-360-jkdm-oracle/gbd-360-jkdm-oracle.properties -auth ldap -run -doas $doas_user

 

workflow.xml

 

<workflow-app xmlns="uri:oozie:workflow:0.4" name="gbd-360-jkdm-oracle-wf">
    <start to="pre_clean_path_node"/>

    <action name="pre_clean_path_node">
        <fs>
            <delete path="${sqoop_import_path}"/>
        </fs>
        <ok to="seq1"/>
        <error to="fail"/>
    </action>

     <action name="seq1">
         <shell xmlns="uri:oozie:shell-action:0.2">
             <job-tracker>${jobtracker_address}</job-tracker>
             <name-node>${namenode_address}</name-node>
             <configuration>
                 <property>
                   <name>mapred.job.queue.name</name>
                   <value>${mapred_job_queue_name}</value>
                 </property>
             </configuration>
             <exec>${seq1}</exec>
             <argument>${nominalformateDate}</argument>
             <argument>${mapred_job_queue_name}</argument>
              <argument>${jk_connection}</argument>
              <argument>${jk_username}</argument>
              <argument>${jk_password}</argument>
             
             <file>shell/${seq1}#${seq1}</file>
         </shell>
         <ok to="process_import_forking2"/>
         <error to="fail"/>
    </action>
     
     <fork name="process_import_forking2">
        <path start="seq2"/>
        <path start="seq3"/>
        <path start="seq4"/>
        <path start="seq5"/>
     </fork>

     <action name="seq2">
         <shell xmlns="uri:oozie:shell-action:0.2">
             <job-tracker>${jobtracker_address}</job-tracker>
             <name-node>${namenode_address}</name-node>
             <configuration>
                 <property>
                   <name>mapred.job.queue.name</name>
                   <value>${mapred_job_queue_name}</value>
                 </property>
             </configuration>
             <exec>${seq2}</exec>
             <argument>${nominalformateDate}</argument>
             <argument>${mapred_job_queue_name}</argument>
              <argument>${jk_connection}</argument>
              <argument>${jk_username}</argument>
              <argument>${jk_password}</argument>
             
             <file>shell/${seq2}#${seq2}</file>
         </shell>
          <ok to="process_import_joining"/>
          <error to="fail"/>
    </action>

.......

   
   <join name="process_import_joining" to="clean_path_node"/>

    <action name="clean_path_node">
        <fs>
            <delete path="${sqoop_import_path}"/>
        </fs>
        <ok to="end"/>
        <error to="fail"/>
    </action>

    <kill name="fail">
        <message>gbd import wf failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <end name="end"/>
</workflow-app>

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值