Gradle
- 依赖:JDK1.8及以上
为什么写Gradle:
Gradle和Maven是我们使用IDEA的依赖管理工具,是我们开发者必不可少需要接触的,Gradle相较于Maven是更新的管理工具,资料较少,公司要求使用scala语言和gradle开发,头疼了好久,终于下定决心用了一下午做了下面的总结。
Gradle安装部署网上的太多,我就总结一下难点,现在网上比较少的是gradle打包scala代码,我这里开源一下自己的代码配置,直接build及可打包,注意修改自己的main方法所在文件,如果有难点可以再博客留言,我会及时回复。
下面是Flink的配置,Spark同理
Flink-gradle
参考配置
- Flink-Build.gradle
plugins {
id 'java'
id 'scala'
id 'maven-publish'
id 'idea'
id "com.github.johnrengelman.shadow" version "4.0.2"
}
group 'com.gtja'
version '1.0.0'
apply plugin: 'java'
apply plugin: 'scala'
apply plugin: 'idea'
sourceCompatibility = 1.8
targetCompatibility = 1.8
configurations {
provided
}
repositories {
mavenLocal()
mavenCentral()
maven {
url '/Users/ailian/Documents/export/server/apache-maven-3.8.1/repository'
}
}
dependencies {
// implementation fileTree(dir: 'lib',include: '*.jar')
/* scala */
implementation 'org.scala-lang:scala-library:2.12.8'
testImplementation group: 'org.scalatest', name: 'scalatest_2.12', version: '3.2.10'
implementation group: 'org.scala-lang.modules', name: 'scala-xml_2.12', version: '2.0.1'
/* commons */
implementation group: 'commons-io', name: 'commons-io', version: '2.11.0'
/* Flink1.12.0*/
//项目基本依赖【因为我先学习java代码,再自己写scala代码,所以配置了两种语言的代码,一般配置一种就ok】
//官网:https://ci.apache.org/projects/flink/flink-docs-release-1.12/zh/dev/project-configuration.html
implementation group: 'org.apache.flink', name: 'flink-scala_2.12', version: '1.12.0'
implementation group: 'org.apache.flink', name: 'flink-java', version: '1.12.0'
compileOnly group: 'org.apache.flink', name: 'flink-streaming-scala_2.12', version: '1.12.0' //scala
compileOnly group: 'org.apache.flink', name: 'flink-streaming-java_2.12', version: '1.12.0' //java
//flink-client
implementation group: 'org.apache.flink', name: 'flink-clients_2.12', version: '1.12.0'
//Flink - table
implementation group: 'org.apache.flink', name: 'flink-table-api-scala-bridge_2.12', version: '1.12.0'
compileOnly group: 'org.apache.flink', name: 'flink-table-api-java-bridge_2.12', version: '1.12.0'
//blink执行计划,1.11+默认的
testImplementation group: 'org.apache.flink', name: 'flink-table-planner-blink_2.12', version: '1.12.0'
compileOnly group: 'org.apache.flink', name: 'flink-table-common', version: '1.12.0'
//flink-cep
compileOnly group: 'org.apache.flink', name: 'flink-cep_2.12', version: '1.12.0'
//flink链接器【我kafka的版本是】 kafka_2.11-1.0.0 【选择2.11版本的,实际生产中scala版本也不是我们控制】
implementation group: 'org.apache.flink', name: 'flink-connector-kafka_2.11', version: '1.12.0'
compileOnly group: 'org.apache.flink', name: 'flink-sql-connector-kafka_2.11', version: '1.12.0'
compileOnly group: 'org.apache.flink', name: 'flink-connector-jdbc_2.12', version: '1.12.0'
testImplementation group: 'org.apache.flink', name: 'flink-csv', version: '1.12.0'
testImplementation group: 'org.apache.flink', name: 'flink-json', version: '1.12.0'
/* Hive */
compileOnly group: 'org.apache.flink', name: 'flink-connector-hive_2.12', version: '1.12.0'
implementation group: 'org.apache.hive', name: 'hive-metastore', version: '2.1.0'
// implementation group: 'org.apache.hive', name: 'hive-exec', version: '2.1.0'
/* Hadoop */
// compileOnly group: 'org.apache.flink', name: 'flink-shaded-hadoop-2-uber', version: '2.7.5-10.0'
/* Hbase */
implementation group: 'org.apache.hbase', name: 'hbase-client', version: '2.1.0'
/* mysql */
implementation group: 'mysql', name: 'mysql-connector-java', version: '8.0.13'
/* 高性能异步组件:Vertx */
implementation group: 'io.vertx', name: 'vertx-core', version: '3.9.0'
implementation group: 'io.vertx', name: 'vertx-jdbc-client', version: '3.9.0'
implementation group: 'io.vertx', name: 'vertx-redis-client', version: '3.9.0'
/* 日志 */
testImplementation group: 'org.slf4j', name: 'slf4j-log4j12', version: '1.7.7'
implementation group: 'log4j', name: 'log4j', version: '1.2.17'
//其它
testImplementation group: 'org.pentaho', name: 'pentaho-aggdesigner-algorithm', version: '5.1.5-jhyde'
implementation group: 'com.alibaba', name: 'fastjson', version: '1.2.44'
compileOnly group: 'org.projectlombok', name: 'lombok', version: '1.18.2'
}
jar {
//详细信息参考 https://docs.gradle.org/current/dsl/org.gradle.api.tasks.bundling.Jar.html
archivesBaseName = 'Example'//基本的文件名
manifest { //配置jar文件的manifest
attributes(
"Manifest-Version": 1.0,
'Main-Class': 'com.gtja.main.Flinkdemo' //指定main方法所在的文件
)
}
//gradle 处理重复文件,include 最后一个重复文件“胜出”的默认策略。
duplicatesStrategy = 'include' // <<---- addition
//打包依赖包
from {
(configurations.runtimeClasspath).collect {
it.isDirectory() ? it : zipTree(it)
}
}
}
spark-gradle
- 参考配置
plugins {
id 'scala'
id 'maven-publish'
id 'idea'
id "com.github.johnrengelman.shadow" version "4.0.2"
}
group 'com.gtja'
version '1.0.0'
apply plugin: 'scala'
apply plugin: 'idea'
sourceCompatibility = 1.8
targetCompatibility = 1.8
configurations {
provided
}
repositories {
mavenLocal()
mavenCentral()
maven {
url '/Users/ailian/Documents/export/server/apache-maven-3.8.1/repository'
}
}
dependencies {
implementation group: 'commons-io', name: 'commons-io', version: '2.11.0'
// implementation fileTree(dir: 'lib',include: '*.jar')
// Use Scala 2.11 in our library project
implementation 'org.scala-lang:scala-library:2.11.12'
// https://mvnrepository.com/artifact/org.apache.spark/spark-core
implementation 'org.apache.spark:spark-core_2.11:2.4.0'
compileOnly 'org.apache.spark:spark-sql_2.11:2.4.0'
// https://mvnrepository.com/artifact/org.apache.spark/spark-streaming
implementation 'org.apache.spark:spark-streaming_2.11:2.4.0'
// Use Scalatest for testing our library
testImplementation 'junit:junit:4.12'
testImplementation 'org.scalatest:scalatest_2.11:3.0.8'
// Need scala-xml at test runtime
testRuntimeOnly 'org.scala-lang.modules:scala-xml_2.11:1.2.0'
}
jar {
//详细信息参考 https://docs.gradle.org/current/dsl/org.gradle.api.tasks.bundling.Jar.html
archivesBaseName = 'Example'//基本的文件名
manifest { //配置jar文件的manifest
attributes(
"Manifest-Version": 1.0,
'Main-Class': 'com.gtja.main' //指定main方法所在的文件
)
}
//gradle 处理重复文件,include 最后一个重复文件“胜出”的默认策略。
duplicatesStrategy = 'include' // <<---- addition
//打包依赖包
from {
(configurations.runtimeClasspath).collect {
it.isDirectory() ? it : zipTree(it)
}
}
}
- 生产执行
#生产执行 cluster模式参数要加;memory和core比例为2G:1;num-executors参数不用加
#部署模式:集群部署模式
spark-submit \
--master yarn \
--deploy-mode cluster \
--driver-memory 8G \
--driver-cores 4 \
--executor-memory 8G \
--executor-cores 4 \
--class com.gtja.main.Test01 xxx.jar
- 测试
spark-submit \
--master yarn \
--num-executors 1 \
--executor-cores 1 \
--executor-memory 1G \
--class com.gtja.main.SparkWordCount \
/root/wsy/lib/SparkWordCount-1.0.0.jar