spark集成springboot

                                                                                             关于springboot与spark集成
         问题:就是我当时spark作业想要用spark-submint去提交,但是我spark的作业结果是要展示到前端页面上,当时不知道怎么去做,就针对这个问题让我很难受所以把这个问题分享出来

这是我的spark的作业。
package sparkSql

import org.apache.spark.SparkConf
import java.util
import java.util.Properties

import org.apache.commons.math3.linear.SparseFieldVector
import org.apache.commons.pool2.impl.GenericObjectPoolConfig
import org.apache.spark.rdd.RDD
import redis.clients.jedis.{HostAndPort, Jedis, JedisCluster}
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.sql.types._
import org.eclipse.jetty.rewrite.handler.RedirectUtil

import scala.collection.mutable.{ArrayBuffer, ListBuffer}
object sparkReadHdfs{
def main(args: Array[String]): Unit = {
//这一块是在写sql的时候,就是sql条件中所传的参数
// val cond = args(0).split("")
// val startTime = cond(0)
// val endTime = cond(1)
val conf = new SparkConf()
.setMaster(“local[*]”)
.setAppName(“sparkReadHdfs”)
.set(“spark.default.parallelism”,“240”)
.set(“spark.serializer”, “org.apache.spark.serializer.KryoSerializer”)
.set(“spark.shuffle.consolidateFiles”, “true”)
.set(“spark.shuffle.memoryFraction”, “0.2”)
.set(“spark.shuffle.file.buffer”, “32kb”)
//.set(“spark.debug.maxToStringFields”,“1000”)
val spark = SparkSession.builder().config(conf).getOrCreate()
/**
* 读取HDFS上的parquet文件,转换成rdd
* rdd => df
*/
val rr = spark.read.parquet(“hdfs://cdh02:8020/home/tangweihao/data_dir/2019-09-23/part-00000-198479b0-483d-4c6c-af6d-6a7dcda039cd-c000.snappy.parquet”).rdd.repartition(1000)
import spark.implicits.

val ss = rr.map(line => {
val arr = line.toString().split("\|")
val requestIdAcJnl = arr(0)
val nodeId = arr(1)
val startDate = arr(2)
val startTime = arr(3)
val startMSec = arr(4)
val logInTuSkShuKnKISHcd = arr(5)
val logInTuSkKuZbTNcd = arr(6)
val logInTuSkKuZno = arr(7)
val logInTuskLoginId = arr(8)
val knrShShuKnKiShCd = arr(9)
val knrShShZkBtn = arr(10)
val knrShLogInId = arr(11)
val chkBn = arr(12)
val ipAdrs = arr(13)
val identityNo = arr(14)
val sentServer = arr(15)
val receivedServer = arr(16)
val host = arr(17)
val requestContentType = arr(18)
val requestContentLength = arr(19)
val requestParameters = arr(20)
val requestUri = arr(21)
val migRcd = arr(22)
val screenId = arr(23)
val requestQueryval = arr(24)
val requestSessionId = arr(25)
val requestHttpMethod = arr(26)
val requestCookie = arr(27)
val responseContentLength = arr(28)
val responseCookie = arr(29)
val responseStatus = arr(30)
val responseHttpHeader = arr(31)
val responseRedirectLocation = arr(32)
val plShrKbn1 = arr(33)
val blShrSts1 = arr(34)
val msgCode11 = arr(35)
val msgCode12 = arr(36)
val plShrKbn2 = arr(37)
val blShrSts2 = arr(38)
val msgCode21 = arr(39)
val msgCode22 = arr(40)
val returnStatusKbn = arr(41)
val endDate = arr(42)
val endTime = arr(43)
val endMSec = arr(44)
val performance = arr(45)
val starSendFlg = arr(46)
val registration = arr(47)
val registerDate = arr(48)
val createTs = arr(49)
val updateTs = arr(50)
val requestId = arr(51)
val device = arr(52)
Row(requestIdAcJnl,nodeId,startDate,startTime,startMSec,logInTuSkShuKnKISHcd,logInTuSkKuZbTNcd,logInTuSkKuZno,logInTuskLoginId,knrShShuKnKiShCd,knrShShZkBtn,knrShLogInId,chkBn,ipAdrs,identityNo,sentServer,receivedServer,host,requestContentType,requestContentLength,requestParameters,requestUri,migRcd,screenId,requestQueryval,requestSessionId,requestHttpMethod,requestCookie,responseContentLength,responseCookie,responseStatus,responseHttpHeader,responseRedirectLocation,plShrKbn1,blShrSts1,msgCode11,msgCode12,plShrKbn2,blShrSts2,msgCode21,msgCode22,returnStatusKbn,endDate,endTime,endMSec,performance,starSendFlg,registration,registerDate,createTs,updateTs,requestId,device)
})
val schema= StructType(Array(
StructField(“requestIdAcJnl”,StringType,true),
StructField(“nodeId”,StringType,true),
StructField(“startDate”,StringType,true),
StructField(“startTime”,StringType,true),
StructField(“startMSec”,StringType,true),
StructField(“logInTuSkShuKnKISHcd”,StringType,true),
StructField(“logInTuSkKuZbTNcd”,StringType,true),
StructField(“logInTuSkKuZno”,StringType,true),
StructField(“logInTuskLoginId”,StringType,true),
StructField(“knrShShuKnKiShCd”,StringType,true),
StructField(“knrShShZkBtn”,StringType,true),
StructField(“knrShLogInId”,StringType,true),
StructField(“chkBn”,StringType,true),
StructField(“ipAdrs”,StringType,true),
StructField(“identityNo”,StringType,true),
StructField(“sentServer”,StringType,true),
StructField(“receivedServer”,StringType,true),
StructField(“host”,StringType,true),
StructField(“requestContentType”,StringType,true),
StructField(“requestContentLength”,StringType,true),
StructField(“requestParameters”,StringType,true),
StructField(“requestUri”,StringType,true),
StructField(“migRcd”,StringType,true),
StructField(“screenId”,StringType,true),
StructField(“requestQueryval”,StringType,true),
StructField(“requestSessionId”,StringType,true),
StructField(“requestHttpMethod”,StringType,true),
StructField(“requestCookie”,StringType,true),
StructField(“responseContentLength”,StringType,true),
StructField(“responseCookie”,StringType,true),
StructField(“responseStatus”,StringType,true),
StructField(“responseHttpHeader”,StringType,true),
StructField(“responseRedirectLocation”,StringType,true),
StructField(“plShrKbn1”,StringType,true),
StructField(“blShrSts1”,StringType,true),
StructField(“msgCode11”,StringType,true),
StructField(“msgCode12”,StringType,true),
StructField(“plShrKbn2”,StringType,true),
StructField(“blShrSts2”,StringType,true),
StructField(“msgCode21”,StringType,true),
StructField(“msgCode22”,StringType,true),
StructField(“returnStatusKbn”,StringType,true),
StructField(“endDate”,StringType,true),
StructField(“endTime”,StringType,true),
StructField(“endMSec”,StringType,true),
StructField(“performance”,StringType,true),
StructField(“starSendFlg”,StringType,true),
StructField(“registration”,StringType,true),
StructField(“registerDate”,StringType,true),
StructField(“createTs”,StringType,true),
StructField(“updateTs”,StringType,true),
StructField(“requestId”,StringType,true),
StructField(“device”,StringType,true)
))
val df = spark.createDataFrame(ss,schema)
//这里是注册的一张临时表
df.createOrReplaceTempView(“t”)
//这个是真正执行sql的语句
val sql: DataFrame = spark.sql(“select requestIdAcJnl from t as REQUESTIDACJNL”)
sql.show()

    //        sql.write.format("jdbc")
    //                .option("url","jdbc:mysql://192.168.168.48:3306/cloudera_manager?useSSL=false")
    //                .option("dbtable","request_di_ac_jnl")
    //                .option("driver","com.mysql.jdbc.Driver")
    //                .option("user","cdh")
    //                .option("password","Cdh@pwd:MySQL57!@#$")
    //                .save()

    //val sqlRDD: RDD[Row] = sql.rdd.repartition(1)
    /**
      * 把spark-sql查询到的结果保存到redis集群中
      * 03-06
      */
    sql.foreachPartition(iter => {
        val password = "Hyron@redis:cluster"
        val timeout = 10000
        val database = 0
        val config = new GenericObjectPoolConfig()
        val hostAndPort1 = new HostAndPort("cdh03", 7380)
        val hostAndPort2 = new HostAndPort("cdh03", 7381)
        val hostAndPort3 = new HostAndPort("cdh04", 7380)
        val hostAndPort4 = new HostAndPort("cdh04", 7381)
        val hostAndPort5 = new HostAndPort("cdh05", 7380)
        val hostAndPort6 = new HostAndPort("cdh05", 7381)
        val hostAndPort7 = new HostAndPort("cdh06", 7380)
        val hostAndPort8 = new HostAndPort("cdh06", 7381)
        val hostAndPortSet = new util.HashSet[HostAndPort]()
        hostAndPortSet.add(hostAndPort1)
        hostAndPortSet.add(hostAndPort2)
        hostAndPortSet.add(hostAndPort3)
        hostAndPortSet.add(hostAndPort4)
        hostAndPortSet.add(hostAndPort5)
        hostAndPortSet.add(hostAndPort6)
        hostAndPortSet.add(hostAndPort7)
        hostAndPortSet.add(hostAndPort8)

// var jedis = new JedisCluster(hostAndPortSet, timeout, timeout, 1024, password, config)
// iter.foreach(row =>{
// jedis.lpush(“requestIdAcJnl”,row.toString())
// })
//
var arr1 = new ArrayBufferString
iter.foreach(row =>{
arr1.append(row.toString())
})
var jedis = new JedisCluster(hostAndPortSet,timeout,timeout,1024,password,config)
jedis.lpush(“requestIdAcJnl-Test3”,arr1.toString())
})
}
}

springboot作业

Controller层
package com.example.demo.controller;

import com.example.demo.service.RemoteShellTool;
import com.example.demo.service.SparkSqlTestService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpRequest;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import javax.servlet.http.HttpServletRequest;
import java.util.Map;

@RestController
public class DemoController {
RemoteShellTool remoteShellTool;
@RequestMapping("/mm")
// public Map<String, Object> calculate(HttpRequest req) {
// return sqlTest.getResult();
// }
public String sqlResult(HttpServletRequest req){
// String startTime = req.getParameter(“startTime”);
// String endTime = req.getParameter(“endTime”);
// String cond = startTime+"_"+endTime;
//remoteShellTool.exec(“sh /home/tangweihao/sh/sparkHH.sh”+cond);
remoteShellTool.exec(“sh /home/tangweihao/sh/sparkHH.sh”);
return “success”;
}
}

service层
package com.example.demo.service;
import ch.ethz.ssh2.Connection;
import ch.ethz.ssh2.Session;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;

public class RemoteShellTool {
private Connection conn;
private String ipAddr;
private String charset = Charset.defaultCharset().toString();
private String userName;
private String password;

public RemoteShellTool( String ipAddr, String userName, String password, String charset) {
    this.ipAddr = ipAddr;
    this.userName = userName;
    this.password = password;
    if(charset !=null){
        this.charset = charset;
    }
}
public boolean login() throws IOException {
    conn = new Connection(ipAddr);
    //连接
    conn.connect();
    //认证
    return  conn.authenticateWithPassword(userName,password);
}

public String exec(String cmds) {
    InputStream in = null;
    String result = "";
    try {
        if(this.login()){
            //打开一个会话
            Session session = conn.openSession();
            session.execCommand(cmds);
            in = session.getStdout();
            result = this.processStdout(in,this.charset);
            conn.close();
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return result;
}

public String processStdout(InputStream in, String charset) {
    byte[]  buffer = new byte[1024];
    StringBuffer sb = new StringBuffer();
    try {
        while (in.read(buffer)!=-1){
            sb.append(new String(buffer,charset));
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return  sb.toString();
}

public static void main(String[] args) {
    RemoteShellTool rst = new RemoteShellTool("192.168.168.222","tangweihao","bigdata@twh312","utf-8");
   // System.out.println(rst.exec("/data/dcos/241client/HDFS/hadoop/sbin/hadoop-daemon.sh start datanode"));

}

}

package com.example.demo.service;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.HashMap;
import java.util.Map;
@Service
public class SparkSqlTestService {
@Autowired
public Map<String,Object> getResult(){
Map<String, Object> map = new HashMap<>();
//这个是一个用java 实现的ssh远程登录linux主机的工具类
//你自己把host替换成你集群的主机名
//root替换为用户名
//123456 替换为密码
RemoteShellTool rst = new RemoteShellTool(“192.168.168.222”,“tangweihao”,“bigdata@twh312”,“utf-8”);
//sh 就是封装那个spark-submit的脚本
rst.exec(“sh /home/tangweihao/sh/sparkHH.sh”);
return map;
}
}

Application层
package com.example.demo;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

@SpringBootApplication
public class DemoApplication {
public static void main(String[] args) {
SpringApplication.run(DemoApplication.class, args);
}
}

pom

<?xml version="1.0" encoding="UTF-8"?>


4.0.0

org.springframework.boot
spring-boot-starter-parent
2.1.6.RELEASE


com.example
spring-boot-spark
0.0.1-SNAPSHOT
demo
Demo project for Spring Boot

<properties>
	<java.version>1.8</java.version>
	<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
	<java.version>1.8</java.version>
	<scala.version>2.11.8</scala.version>
	<spark.version>2.2.1</spark.version>
</properties>

<repositories>
	<repository>
		<id>cdh</id>
		<name>Cloudera Rel Repository</name>
		<url>https://repository.cloudera.com/content/repositories/releases/</url>
	</repository>

	<repository>
		<id>cloudera</id>
		<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
	</repository>

</repositories>

<dependencies>

	<!-- https://mvnrepository.com/artifact/org.apache.kudu/kudu-client -->
	<dependency>
		<groupId>org.apache.kudu</groupId>
		<artifactId>kudu-client</artifactId>
		<version>1.6.0-cdh5.14.0</version>
		<scope>test</scope>
	</dependency>


	<!-- https://mvnrepository.com/artifact/org.apache.kudu/kudu-client-tools -->
	<dependency>
		<groupId>org.apache.kudu</groupId>
		<artifactId>kudu-client-tools</artifactId>
		<version>1.6.0-cdh5.14.0</version>
	</dependency>


	<!-- https://mvnrepository.com/artifact/org.apache.kudu/kudu-spark2 -->
	<dependency>
		<groupId>org.apache.kudu</groupId>
		<artifactId>kudu-spark2_2.11</artifactId>
		<version>1.6.0-cdh5.14.0</version>
	</dependency>

	<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
	<dependency>
		<groupId>org.apache.spark</groupId>
		<artifactId>spark-sql_2.11</artifactId>
		<version>2.1.0</version>
	</dependency>

	<dependency>
		<groupId>ch.ethz.ganymed</groupId>
		<artifactId>ganymed-ssh2</artifactId>
		<version>262</version>
	</dependency>


	<dependency>
		<groupId>org.springframework.boot</groupId>
		<artifactId>spring-boot-starter-thymeleaf</artifactId>
	</dependency>

	<dependency>
		<groupId>org.springframework.boot</groupId>
		<artifactId>spring-boot-starter-web</artifactId>
	</dependency>

	<!--https://mvnrepository.com/-->
	<dependency>
		<groupId>org.scala-lang</groupId>
		<artifactId>scala-library</artifactId>
		<version>${scala.version}</version>
	</dependency>

	<dependency>
		<groupId>org.apache.spark</groupId>
		<artifactId>spark-core_2.11</artifactId>
		<version>${spark.version}</version>
	</dependency>




	<dependency>
		<groupId>org.apache.hadoop</groupId>
		<artifactId>hadoop-client</artifactId>
		<version>2.6.0-cdh5.14.2</version>
		<exclusions>
			<exclusion>
				<groupId>org.slf4j</groupId>
				<artifactId>slf4j-log4j12</artifactId>
			</exclusion>
			<exclusion>
				<groupId>log4j</groupId>
				<artifactId>log4j</artifactId>
			</exclusion>
		</exclusions>

	</dependency>

	<!-- 开发spark-sql的核心依赖 -->
	<dependency>
		<groupId>org.apache.spark</groupId>
		<artifactId>spark-sql_2.11</artifactId>
		<version>${spark.version}</version>
	</dependency>

	<dependency>
		<groupId>org.codehaus.janino</groupId>
		<artifactId>janino</artifactId>
		<version>3.0.8</version>
	</dependency>

	<!-- spark sql集成hive必须的 -->
	<dependency>
		<groupId>org.apache.spark</groupId>
		<artifactId>spark-hive_2.11</artifactId>
		<version>${spark.version}</version>
	</dependency>

	<!-- 开发spark streaming应用程序的核心依赖 提供的基础数据源-->
	<dependency>
		<groupId>org.apache.spark</groupId>
		<artifactId>spark-streaming_2.11</artifactId>
		<version>${spark.version}</version>
	</dependency>


	<dependency>
		<groupId>org.springframework.boot</groupId>
		<artifactId>spring-boot-starter-test</artifactId>
		<scope>test</scope>
	</dependency>


</dependencies>

<build>
	<plugins>
		<plugin>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-maven-plugin </artifactId>
		</plugin>
		<plugin>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-maven-plugin</artifactId>
			<executions>
				<execution>
					<goals>
						<goal>repackage</goal>
					</goals>
				</execution>
			</executions>
		</plugin>
	</plugins>
</build>
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值