在网上查阅了很多资料,主要参考了以下两位大神的博客
https://blog.csdn.net/gx304419380/article/details/79361645
https://blog.csdn.net/fansy1990/article/details/78551986
写了一个小demo。
首先spark jar要上传至hdfs中。
pom文件中只需要依赖
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.2.1</version>
</dependency>
java代码如下:
import org.apache.spark.SparkConf;
import org.apache.spark.deploy.rest.CreateSubmissionResponse;
import org.apache.spark.deploy.rest.RestSubmissionClient;
import org.apache.spark.deploy.rest.SubmissionStatusResponse;
import scala.collection.immutable.HashMap;
public class test {
public static void main(String[] args) {
String id = test2.submit();
boolean flag;
while (true){
flag = test2.monitory(id);
if (flag) {
break;
}
}
System.out.println("spark执行完成");
}
public static String submit() {
String appResource = "hdfs://cluster1/queryLog-1.0-SNAPSHOT.jar";
String mainClass = "SelectLog";
String[] args = {
}; //spark程序需要的参数
SparkConf sparkConf = new SparkConf();
sparkConf.setMaster("spark://192.168.1.107:6066");
sparkConf.set("spark.submit.deployMode", "cluster");
sparkConf.set("spark.jars", appResource);
sparkConf.set("spark.driver.supervise", "false");
sparkConf.setAppName("queryLog"+ System.currentTimeMillis());
CreateSubmissionResponse response = null;
try {
response = (CreateSubmissionResponse)
RestSubmissionClient.run(appResource, mainClass, args, sparkConf, new HashMap<String,String>());
} catch (Exception e) {
e.printStackTrace();
}
return response.submissionId();
}
private static RestSubmissionClient client = new RestSubmissionClient("spark://192.168.1.107:6066");
public static boolean monitory(String appId){
SubmissionStatusResponse response = null;
boolean finished =false;
try {
response = (SubmissionStatusResponse) client.requestSubmissionStatus(appId, true);
if("FINISHED" .equals(response.driverState()) || "ERROR".equals(response.driverState())){
finished = true;
}
Thread.sleep(5000);
} catch (Exception e) {
e.printStackTrace();
}
return finished;
}
}