通过http提供spark计算服务

spark版本

	2.4.3,内核版本2_11

jdk版本

	1.8

hadoop版本

	2.7.7

集群方式

	standlone cluster

测试语言

	java

Http服务

	jettey

要点

	1、dirver的spark版本必须和集群一直,内核版本也要一致,如集群版本2.4.3 内核2_11, driver spark版本为2.4.3,内核2_12,测试失败,猜测jdk版本也要保持一致,为进行验证;
	2、依赖包:程序执行所需的依赖包必须能够让所有的节点访问,也可以将jar包直接放到节点的 spark/jars目录下
	3、运行程序也需要能够让所有节点获取到,可以通过SparkConf设置运行程序路径
	4、数据需要让所有节点都能访问到,本测试采用hdfs存储数据
	5、dirver host参数的设置,必须要各节点能够访问到dirver

源码:

SparkTest类
package com.ferret.spark;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;

import java.util.List;
import java.util.Map;
import java.util.Properties;

public class SparkTest {
private JavaSparkContext sc;
private SparkSession ss;
String[] jars = {“D:\workspace\idea\spark-dirver\build\libs\spark-dirver-1.0-SNAPSHOT.jar”};
public void initSparkContext() {
SparkConf sparkConf = new SparkConf().setAppName(“test_text”)
.setMaster(“spark://192.168.15.81:7077”)
.set(“spark.driver.host”, “192.168.15.75”)
.set(“spark.driver.port”, “62650”)
.setJars(jars);
//SparkContext sparkContext = new SparkContext(sparkConf);
sc = new JavaSparkContext(sparkConf);
}
// rdd测试
public Map<String, Integer> testRdd(String filePath) {
//String filePath = “hdfs://192.168.15.81:9000/test/text.txt”;
JavaRDD rdd = sc.textFile(filePath, 6);
JavaPairRDD<String, Integer> rdd1 = rdd.mapToPair(s -> new Tuple2<>(s.split(",")[0], 1)).reduceByKey((a, b) -> a + b);
Map<String, Integer> rs = rdd1.collectAsMap();
return rs;
}
public void initSparkSession() {
SparkConf sparkConf = new SparkConf().setAppName(“test_sql”)
.setMaster(“spark://192.168.15.81:7077”)
.set(“spark.driver.host”, “192.168.15.75”)
.set(“spark.driver.port”, “62650”)
.setJars(jars);
ss = SparkSession.builder().config(sparkConf).getOrCreate();
}
// sql测试
public List testSql() {
Properties properties = new Properties();
properties.put(“dirver”,“com.mysql.cj.jdbc.Driver”);
properties.put(“user”, “username”);
properties.put(“password”, “xxxxx”);
Dataset dataset = ss.read().jdbc(“jdbc:mysql://192.168.15.219:3306/dbname?characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai”,
“jh_cluster_pass”,properties);
return dataset.groupBy(“clusterid”).count().orderBy(new Column(“count”).desc()).takeAsList(20);
}
}

Jetty Handler类

package com.ferret.spark;

import com.alibaba.fastjson.JSONObject;
import org.apache.spark.sql.Row;
import org.eclipse.jetty.server.Request;
import org.eclipse.jetty.server.handler.AbstractHandler;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.List;
import java.util.Map;

public class HttpHandler extends AbstractHandler {
private SparkTest sparkTest;

public HttpHandler() {
    this.sparkTest = new SparkTest();
    //sparkTest.initSparkContext();
    sparkTest.initSparkSession();
}

@Override
public void handle(String target, Request baseRequest, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException {
    if(target.equals("/spark")) {
        JSONObject jsonObject = new JSONObject();
        Map<String,Integer> rs = sparkTest.testRDD("hdfs://192.168.15.81:9000/test/text.txt");
        jsonObject = (JSONObject) JSONObject.toJSON(rs);
        write(jsonObject.toJSONString(), response);
    }
    if(target.equals("/sql")) {
        List<Row> rs = sparkTest.testSql();
        StringBuffer sb = new StringBuffer();
        rs.forEach(a -> sb.append("id:" + a.getString(0)+", count:" + a.getLong(1) + "\n"));
        write(sb.toString(), response);
    }

}

private void write(String msg, HttpServletResponse response) {
    PrintWriter writer = null;
    try {
        writer = response.getWriter();
        writer.print(msg);
    }catch (IOException e) {
        e.printStackTrace();
    }finally {
        if (writer != null)  writer.close();
    }
}

}

HttpServer类
package com.ferret.spark;

import org.eclipse.jetty.server.Server;

public class HttpServer {
private int port;

public HttpServer(int port) {
    this.port = port;
}
public void start() {
    Server server = new Server(port);
    // 设置在JVM退出时关闭Jetty的钩子。
    server.setStopAtShutdown(true);
    // 设置处理所有请求的handler
    server.setHandler(new HttpHandler());
    try {
        server.start();
        server.join();
    } catch (Exception e) {
        System.out.println("启动jetty失败");
        e.printStackTrace();
    }
}

}

Main

package com.ferret.spark;

public class Main {
public static void main(String[] args) {
HttpServer httpServer = new HttpServer(10001);
httpServer.start();
}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值