最近项目准备把hadoop的MR转换为Spark,以前的MR是可以直接提交java文件到集群服务器中,但Spark我没有找到相应的方式(有大神知道如何处理希望可以告之下),我这边使用了SparkAppHandle的方式来进行处理.
CountDownLatch cdl= new CountDownLatch(1);
SparkAppHandle handle = new SparkLauncher().setSparkHome("/usr/local/spark-2.2.0")
.setAppResource("/usr/local/spark-2.2.0/lib/spark.jar")
.setMainClass("run.aaa.spark.SimpleApp")
.setMaster("yarn").setDeployMode("client")
.setAppName("test yarn client")
.setConf("spark.yarn.jars", "hdfs://master:9000/tmp/spark-jars/*")
.setConf("spark.driver.allowMultipleContexts", "true")
.setConf("spark.executor.cores", "2")
.setConf("spark.executor.instances", "2")
.addAppArgs("/README.md")
.setVerbose(true)
.startApplication(new SparkAppHandle.Listener() {
// 这里监听任务状态,当任务结束时(不管是什么原因结束),isFinal方法会返回true,否则返回false
@Override
public void stateChanged(SparkAppHandle sparkAppHandle) {
if (sparkAppHandle.getState().isFinal()) {
cdl.countDown();
}
System.out.println("state:" + sparkAppHandle.getState().toString());
}
@Override
public void infoChanged(SparkAppHandle sparkAppHandle) {
System.out.println("Info:" + sparkAppHandle.getState().toString());
}
});
System.out.println("The task is executing, please wait ....");
// 线程等待任务结束
cdl.await();
System.out.println("The task is finished!");