spark版本
2.4.3,内核版本2_11
jdk版本
1.8
hadoop版本
2.7.7
集群方式
standlone cluster
测试语言
java
Http服务
jettey
要点
1、dirver的spark版本必须和集群一直,内核版本也要一致,如集群版本2.4.3 内核2_11, driver spark版本为2.4.3,内核2_12,测试失败,猜测jdk版本也要保持一致,为进行验证;
2、依赖包:程序执行所需的依赖包必须能够让所有的节点访问,也可以将jar包直接放到节点的 spark/jars目录下
3、运行程序也需要能够让所有节点获取到,可以通过SparkConf设置运行程序路径
4、数据需要让所有节点都能访问到,本测试采用hdfs存储数据
5、dirver host参数的设置,必须要各节点能够访问到dirver
源码:
SparkTest类
package com.ferret.spark;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
import java.util.List;
import java.util.Map;
import java.util.Properties;
public class SparkTest {
private JavaSparkContext sc;
private SparkSession ss;
String[] jars = {“D:\workspace\idea\spark-dirver\build\libs\spark-dirver-1.0-SNAPSHOT.jar”};
public void initSparkContext() {
SparkConf sparkConf = new SparkConf().setAppName(“test_text”)
.setMaster(“spark://192.168.15.81:7077”)
.set(“spark.driver.host”, “192.168.15.75”)
.set(“spark.driver.port”, “62650”)
.setJars(jars);
//SparkContext sparkContext = new SparkContext(sparkConf);
sc = new JavaSparkContext(sparkConf);
}
// rdd测试
public Map<String, Integer> testRdd(String filePath) {
//String filePath = “hdfs://192.168.15.81:9000/test/text.txt”;
JavaRDD rdd = sc.textFile(filePath, 6);
JavaPairRDD<String, Integer> rdd1 = rdd.mapToPair(s -> new Tuple2<>(s.split(",")[0], 1)).reduceByKey((a, b) -> a + b);
Map<String, Integer> rs = rdd1.collectAsMap();
return rs;
}
public void initSparkSession() {
SparkConf sparkConf = new SparkConf().setAppName(“test_sql”)
.setMaster(“spark://192.168.15.81:7077”)
.set(“spark.driver.host”, “192.168.15.75”)
.set(“spark.driver.port”, “62650”)
.setJars(jars);
ss = SparkSession.builder().config(sparkConf).getOrCreate();
}
// sql测试
public List testSql() {
Properties properties = new Properties();
properties.put(“dirver”,“com.mysql.cj.jdbc.Driver”);
properties.put(“user”, “username”);
properties.put(“password”, “xxxxx”);
Dataset dataset = ss.read().jdbc(“jdbc:mysql://192.168.15.219:3306/dbname?characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai”,
“jh_cluster_pass”,properties);
return dataset.groupBy(“clusterid”).count().orderBy(new Column(“count”).desc()).takeAsList(20);
}
}
Jetty Handler类
package com.ferret.spark;
import com.alibaba.fastjson.JSONObject;
import org.apache.spark.sql.Row;
import org.eclipse.jetty.server.Request;
import org.eclipse.jetty.server.handler.AbstractHandler;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.List;
import java.util.Map;
public class HttpHandler extends AbstractHandler {
private SparkTest sparkTest;
public HttpHandler() {
this.sparkTest = new SparkTest();
//sparkTest.initSparkContext();
sparkTest.initSparkSession();
}
@Override
public void handle(String target, Request baseRequest, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException {
if(target.equals("/spark")) {
JSONObject jsonObject = new JSONObject();
Map<String,Integer> rs = sparkTest.testRDD("hdfs://192.168.15.81:9000/test/text.txt");
jsonObject = (JSONObject) JSONObject.toJSON(rs);
write(jsonObject.toJSONString(), response);
}
if(target.equals("/sql")) {
List<Row> rs = sparkTest.testSql();
StringBuffer sb = new StringBuffer();
rs.forEach(a -> sb.append("id:" + a.getString(0)+", count:" + a.getLong(1) + "\n"));
write(sb.toString(), response);
}
}
private void write(String msg, HttpServletResponse response) {
PrintWriter writer = null;
try {
writer = response.getWriter();
writer.print(msg);
}catch (IOException e) {
e.printStackTrace();
}finally {
if (writer != null) writer.close();
}
}
}
HttpServer类
package com.ferret.spark;
import org.eclipse.jetty.server.Server;
public class HttpServer {
private int port;
public HttpServer(int port) {
this.port = port;
}
public void start() {
Server server = new Server(port);
// 设置在JVM退出时关闭Jetty的钩子。
server.setStopAtShutdown(true);
// 设置处理所有请求的handler
server.setHandler(new HttpHandler());
try {
server.start();
server.join();
} catch (Exception e) {
System.out.println("启动jetty失败");
e.printStackTrace();
}
}
}
Main
package com.ferret.spark;
public class Main {
public static void main(String[] args) {
HttpServer httpServer = new HttpServer(10001);
httpServer.start();
}
}