1.需要在oozie-site.xml设置如下属性:
<property> <name>hadoop.proxyuser.cenyuhai.hosts</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.cenyuhai.groups</name> <value>*</value> </property>
如果Oozie报错ClassNotFoundException: Class org.apache.oozie.action.hadoop.SparkMain 再添加一个属性
<property> <name>oozie.use.system.libpath</name> <value>true</value> </property>
3.Java调用代码
maven的jar吧
<dependency>
<groupId>org.apache.oozie</groupId>
<artifactId>oozie-client</artifactId>
<version>4.1.0-cdh5.9.0</version>
</dependency>
<dependency>
<groupId>org.apache.oozie</groupId>
<artifactId>oozie-core</artifactId>
<version>4.1.0-cdh5.9.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.oozie</groupId>
<artifactId>oozie-hadoop</artifactId>
<version>2.6.0-cdh5.9.2.oozie-4.1.0-cdh5.9.2</version>
<scope>provided</scope>
</dependency>
import org.apache.oozie.client.OozieClient;
import org.apache.oozie.client.OozieClientException;
import org.apache.oozie.client.WorkflowJob;
import org.apache.oozie.client.WorkflowJob.Status;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
/**
* Created by root on 7/19/17.
*/
public class WorkFlowClient {
private static String OOZIE_URL = "http://master01:11000/oozie/";
private static String JOB_PATH = "hdfs://master01:8020/bobo/in/flow/workflow.xml";
private static String JOB_Tracker = "master01:8032";
private static String NAMENode = "hdfs://master01:8020";
OozieClient wc = null;
public WorkFlowClient(String url){
wc = new OozieClient(url);
}
public String startJob(String wfDefinition, List<WorkflowParameter> wfParameters)
throws OozieClientException {
// create a workflow job configuration and set the workflow application path
Properties conf = wc.createConfiguration();
conf.setProperty(OozieClient.APP_PATH, wfDefinition);
// setting workflow parameters
conf.setProperty("jobTracker", JOB_Tracker);
conf.setProperty("nameNode", NAMENode);
if((wfParameters != null) && (wfParameters.size() > 0)){
for(WorkflowParameter parameter : wfParameters)
conf.setProperty(parameter.getName(), parameter.getValue());
}
// submit and start the workflow job
return wc.run(conf);
}
public Status getJobStatus(String jobID) throws OozieClientException{
WorkflowJob job = wc.getJobInfo(jobID);
return job.getStatus();
}
public static void main(String[] args) throws OozieClientException, InterruptedException{
// Create client
WorkFlowClient client = new WorkFlowClient(OOZIE_URL);
// Create parameters
List<WorkflowParameter> wfParameters = new LinkedList<WorkflowParameter>();
WorkflowParameter jobmaster = new WorkflowParameter("jobmaster","yarn-client");
WorkflowParameter jobmode = new WorkflowParameter("jobmode","client");
WorkflowParameter jobname = new WorkflowParameter("jobname","SparkOozieAction");
WorkflowParameter jarclass = new WorkflowParameter("jarclass","com.kafkaspark.sparkstreaming.SparkstreamingData");
WorkflowParameter jarpath = new WorkflowParameter("jarpath","hdfs://master01:8020/bobo/in/flow/kafkaspark.jar");
WorkflowParameter sparkopts = new WorkflowParameter("sparkopts","--num-executors 3 --executor-memory 1G --executor-cores 3 --driver-memory 2G --files config.properties --conf spark.executor.extraJavaOptions=-XX:+UseConcMarkSweepGC" +
" --conf spark.yarn.jar=hdfs://master01:8020/bobo/in/flow/spark-assembly-1.6.0-cdh5.9.0-hadoop2.6.0-cdh5.9.0.jar");
WorkflowParameter jararg1 = new WorkflowParameter("jararg1","slave01:9092,slave02:9092,slave03:9092");
WorkflowParameter jararg2 = new WorkflowParameter("jararg2","DATA-TOPIC");
wfParameters.add(jobmaster);
wfParameters.add(jobmode);
wfParameters.add(jobname);
wfParameters.add(jarclass);
wfParameters.add(jarpath);
wfParameters.add(sparkopts);
wfParameters.add(jararg1);
wfParameters.add(jararg2);
// Start Oozing
String jobId = client.startJob(JOB_PATH, wfParameters);
Status status = client.getJobStatus(jobId);
if(status == Status.RUNNING)
System.out.println("Workflow job running");
else
System.out.println("Problem starting Workflow job");
}
}
parameter类就一个name和value属性
4.workflow.xml的配置
<workflow-app name="Spark_Workflow" xmlns="uri:oozie:workflow:0.1">
<start to="spark-SparkOozieAction"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="spark-SparkOozieAction">
<spark xmlns="uri:oozie:spark-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<master>${jobmaster}</master>
<mode>${jobmode}</mode>
<name>${jobname}</name>
<class>${jarclass}</class>
<jar>${jarpath}</jar>
<spark-opts>${sparkopts}</spark-opts>
<arg>${jararg1}</arg>
<arg>${jararg2}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>
5.你就可以去oozie的web界面查看提交的job