Scala开发,这里只是Spark local模式的简单示例,工作也没有需求 http请求去执行Spark yarn,所以暂时没有去管。
pom.xml
需要注意的是去除掉 SpringBoot 默认的日志引擎。
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<slf4j.version>1.7.28</slf4j.version>
<log4j.version>2.12.1</log4j.version>
<scala.version>2.11.12</scala.version>
<spark.version>2.4.0</spark.version>
<hive.version>2.1.1</hive.version>
<mysql>5.1.47</mysql>
</properties>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.1.7.RELEASE</version>
<relativePath/>
</parent>
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-nop</artifactId>
<version>${slf4j.version}</version>
</dependency>
<!-- scala -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<!-- spark -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-yarn_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
</dependency>
<dependency>
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
<version>3.0.8</version>
</dependency>
<!-- sringboot -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-aop</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.9</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<fork>true</fork>
</configuration>
</plugin>
</plugins>
</build>
SparkConfig
这里可以在application.properties/ yml 中配置参数,这里测试就简单写死。
import java.io.File
import org.apache.spark.sql.SparkSession
import org.springframework.boot.context.properties.ConfigurationProperties
import org.springframework.stereotype.Component
/**
* create by LiuJinHe 2019/9/25
*/
@Component
@ConfigurationProperties(prefix = "spark-config")
class SparkConfig {
private val warehouseLocation = new File("spark-warehouse").getAbsolutePath
private val appName:String = "SparkJob"
private val master:String = "local"
def initSparkSession: SparkSession = SparkSession.builder
.appName(appName)
.master(master)
.config("spark.sql.warehouse.dir", warehouseLocation)
.enableHiveSupport()
.getOrCreate()
}
SpringBootApplication。
import org.springframework.boot.SpringApplication
import org.springframework.boot.autoconfigure.{EnableAutoConfiguration, SpringBootApplication}
import org.springframework.context.annotation.ComponentScan
/**
* SpringBoot 入口
*
* create by LiuJinHe 2019/9/25
*/
@EnableAutoConfiguration
@ComponentScan
@SpringBootApplication
class CrowdPackageApp
object CrowdPackageApp extends App {
SpringApplication.run(classOf[CrowdPackageApp])
}
RestController
这里直接把SparkJob写成了object,没有通过注解@Autowired 来实例化。
import com.sm.service.CrowdService
import org.springframework.web.bind.annotation.{CrossOrigin, RequestMapping, RequestMethod, RequestParam,
ResponseBody, RestController}
/**
* 请求地址
*
* create by LiuJinHe 2019/9/25
*/
@RestController
@CrossOrigin
class CrowdController {
/**
* 请求
*/
@RequestMapping(value = Array("/crowd"), method = Array(RequestMethod.GET))
@ResponseBody
def exportCrowd(@RequestParam db: String, @RequestParam table: String): String = {
SparkJob.getGameId(db, table)
}
}
SparkJob
spark任务执行类。
import com.sm.config.SparkConfig
import org.apache.spark.sql.SparkSession
import org.springframework.beans.factory.annotation.Autowired
/**
* create by LiuJinHe 2019/9/25
*/
object SparkJob {
@Autowired
var sparkConf: SparkConfig = new SparkConfig
var spark:SparkSession = sparkConf.initSparkSession
def getGameId(db: String,table:String):String = {
// val url = s"jdbc:mysql://localhost:3306/${db}?characterEncoding=UTF-8&serverTimezone=UTC&user=root&password=863863"
// val prop = new Properties()
// val df = spark.read.jdbc(url, $table, prop)
// df.createOrReplaceTempView("tmpTable")
// val dataFrame = spark.sql("select * from tmpTable")
// dataFrame.show().rdd.toString
spark.sql(s"select * from $db.$table limit 10").show().toString
}
}