sqoop+hive+shell+oozie 示例

示例



create.q

create external table IF NOT EXISTS default.SYS_ACCOUNT
(
  account_id           double,
  account_name         string,
  account_password     string,
  status               int,
  last_login_device_id string,
  band_device_id       string,
  key                  string,
  last_login_ip        string,
  account_type         int,
  lock_time            DATE,
  account_password_ver string
)
partitioned by (date_time string) 
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
location '/data/hive/sys_account';

job.properties


nameNode=hdfs://fuze250:8020
jobTracker=fuze250:8032
#nameNode=hdfs://nameservice
#jobTracker=yarnRM
queueName=default
oozieRoot=oozie
hiveRoot=hive

oozie.use.system.libpath=true
oozie.libpath=/user/oozie/share/lib
oozie.wf.application.path=${nameNode}/user/${user.name}/${oozieRoot}/sqoop/sys_account_user_vip

procedure.q


ALTER TABLE default.SYS_ACCOUNT ADD PARTITION (date_time=${date_time}) location '/data/hive/sys_account/date_time=${date_time}';

script.sh

#!/bin/bash
DATE=$(date +%Y%m%d);
echo date_time=$DATE

workflow.xml

自己写的

<?xml version="1.0" encoding="UTF-8"?>
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
	license agreements. See the NOTICE file distributed with this work for additional 
	information regarding copyright ownership. The ASF licenses this file to 
	you under the Apache License, Version 2.0 (the "License"); you may not use 
	this file except in compliance with the License. You may obtain a copy of 
	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
	by applicable law or agreed to in writing, software distributed under the 
	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
	OF ANY KIND, either express or implied. See the License for the specific 
	language governing permissions and limitations under the License. -->
<workflow-app name="sys_account" xmlns="uri:oozie:workflow:0.5">
	<start to="sys_account_create" />

	<action name="sys_account_create" cred="hive2">
		<hive2 xmlns="uri:oozie:hive2-action:0.1">
			<job-tracker>${jobTracker}</job-tracker>
			<name-node>${nameNode}</name-node>
			<jdbc-url>jdbc:hive2://fuze248:10000/default</jdbc-url>
			<configuration>
				<property>
					<name>mapred.job.queue.name</name>
					<value>${queueName}</value>
				</property>
			</configuration>
			<script>create.q</script>
		</hive2>
		<ok to="shell-date" />
		<error to="fail" />
	</action>

	<action name="shell-date">
		<shell xmlns="uri:oozie:shell-action:0.2">
			<job-tracker>${jobTracker}</job-tracker>
			<name-node>${nameNode}</name-node>
			<configuration>
				<property>
					<name>mapred.job.queue.name</name>
					<value>${queueName}</value>
				</property>
			</configuration>
			<exec>script.sh</exec>
			<file>script.sh</file>
			<capture-output />
		</shell>
		<ok to="sys_account_import" />
		<error to="fail" />
	</action>

	<action name="sys_account_import">
		<sqoop xmlns="uri:oozie:sqoop-action:0.2">
			<job-tracker>${jobTracker}</job-tracker>
			<name-node>${nameNode}</name-node>
			<configuration>
				<property>
					<name>mapred.job.queue.name</name>
					<value>${queueName}</value>
				</property>
			</configuration>
			<arg>import</arg>
			<arg>--connect</arg>
			<arg>jdbc:oracle:thin:@xxx:1521:xxx</arg>
			<arg>--username</arg>
			<arg>FUZEPASSPORT</arg>
			<arg>--SYS_ACCOUNT</arg>
			<arg>FUZEPASSPORT_PWD</arg>
			<arg>--query</arg>
			<arg>select SA.* from SYS_ACCOUNT SA where $CONDITIONS</arg>
			<arg>--delete-target-dir</arg>
			<arg>--target-dir</arg>
			<arg>/data/hive/sys_account/date_time=${(wf:actionData('shell-date')['date_time'])}
			</arg>
			<arg>--fields-terminated-by</arg>
			<arg>\001</arg>
			<arg>-m</arg>
			<arg>1</arg>
		</sqoop>
		<ok to="sys_account_procedure" />
		<error to="fail" />
	</action>

	<action name="sys_account_procedure" cred="hive2">
		<hive2 xmlns="uri:oozie:hive2-action:0.1">
			<job-tracker>${jobTracker}</job-tracker>
			<name-node>${nameNode}</name-node>
			<jdbc-url>jdbc:hive2://fuze248:10000/default</jdbc-url>
			<configuration>
				<property>
					<name>mapred.job.queue.name</name>
					<value>${queueName}</value>
				</property>
			</configuration>
			<script>procedure.q</script>
			<param>date_time=${(wf:actionData('shell-date')['date_time'])}
			</param>
		</hive2>
		<ok to="end" />
		<error to="fail" />
	</action>

	<kill name="fail">
		<message>Sqoop failed, error
			message[${wf:errorMessage(wf:lastErrorNode())}]</message>
	</kill>
	<end name="end" />
</workflow-app>


workflowhue.xml

hue配置的

<workflow-app name="sys_account" xmlns="uri:oozie:workflow:0.5">
    <start to="hive2-13ef"/>
    <kill name="Kill">
        <message>操作失败,错误消息[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="hive2-13ef" cred="hive2">
        <hive2 xmlns="uri:oozie:hive2-action:0.1">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <jdbc-url>jdbc:hive2://fuze248:10000/default</jdbc-url>
            <script>/user/root/oozie/sqoop/sys_account_user_vip/create.q</script>
        </hive2>
        <ok to="shell-a94d"/>
        <error to="Kill"/>
    </action>
    <action name="shell-a94d">
        <shell xmlns="uri:oozie:shell-action:0.1">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <exec>/user/root/oozie/sqoop/sys_account_user_vip/script.sh</exec>
            <file>/user/root/oozie/sqoop/sys_account_user_vip/script.sh#script.sh</file>
              <capture-output/>
        </shell>
        <ok to="sqoop-a084"/>
        <error to="Kill"/>
    </action>
    <action name="sqoop-a084">
        <sqoop xmlns="uri:oozie:sqoop-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
			<arg>import</arg>
			<arg>--connect</arg>
			<arg>jdbc:oracle:thin:xxx:1521:xxx</arg>
			<arg>--username</arg>
			<arg>FUZEPASSPORT</arg>
			<arg>--password</arg>
			<arg>FUZEPASSPORT_PWD</arg>
			<arg>--query</arg>
			<arg>select SA.* from SYS_ACCOUNT SA where $CONDITIONS</arg>
			<arg>--delete-target-dir</arg>
			<arg>--target-dir</arg>
			<arg>/data/hive/sys_account/date_time=${(wf:actionData('shell-a94d')['date_time'])}
			</arg>
			<arg>--fields-terminated-by</arg>
			<arg>\001</arg>
			<arg>-m</arg>
			<arg>1</arg>
        </sqoop>
        <ok to="hive2-46d1"/>
        <error to="Kill"/>
    </action>
    <action name="hive2-46d1" cred="hive2">
        <hive2 xmlns="uri:oozie:hive2-action:0.1">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <jdbc-url>jdbc:hive2://fuze248:10000/default</jdbc-url>
            <script>/user/root/oozie/sqoop/sys_account_user_vip/procedure.q</script>
              <param>date_time=${(wf:actionData('shell-a94d')['date_time'])}</param>
        </hive2>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
</workflow-app>


shell 执行其他命令

<workflow-app name="shell" xmlns="uri:oozie:workflow:0.5">
    <start to="shell-3ac6"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="shell-3ac6">
        <shell xmlns="uri:oozie:shell-action:0.1">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <exec>sh</exec>
              <argument>test.sh</argument>
            <file>/tmp/shell/test.sh#test.sh</file>
              <capture-output/>
        </shell>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
</workflow-app>

test.sh 文件

#!/bin/bash
sqoop eval --connect jdbc:mysql://xxx:3306/test --username root --password xxx -e "TRUNCATE TABLE tb_test"
spark-submit  --class com.userportrait.OozieTest --master yarn-cluster --num-executors 1 --executor-cores 2 --executor-memory 512M --driver-memory 512M sparktest-1.0-SNAPSHOT.jar


评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值