一.pom.xml
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>2.4.3</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>2.4.3</version>
</dependency>
<dependency>
<groupId>ru.yandex.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<version>0.2.4</version>
</dependency>
</dependencies>
二.spark_clickhouse_read_data.scala
object spark_clickhouse_read_data {
def main(args: Array[String]): Unit = {
//1.配置环境
val spark: SparkSession = sparkUtils.sparkSessionWithMaster("local[*]","spark_clickhouse_read_data_job")
//2.数据源配置
val sourceConfig: Map[String, String] = Map[String, String](
"driver" -> "ru.yandex.clickhouse.ClickHouseDriver",
"url" -> "jdbc:clickhouse://192.168.174.204:8123/spark_clickhouse",
"dbtable" -> "t_order_mt",
"user"->"admin",
"password" ->"root"
)
//3.读取数据源数据
spark
.read
.format("jdbc")
.options(sourceConfig)
.load()
.show(true)
//4.关闭资源
spark.stop()
}
}
三.spark_clickhouse_write_data.scala
object spark_clickhouse_write_data {
def main(args: Array[String]): Unit = {
//1.营造环境
val spark: SparkSession = sparkUtils.sparkSessionWithMaster("local[*]","spark_clickhouse_write_data_job")
//创建数据 生成df
val data: DataFrame = spark.createDataFrame(Seq(
(101, "sku_001", 1000.00, "2020-06-01 12:00:00"),
(103, "sku_002", 2000.00, "2020-06-01 11:00:00"),
(104, "sku_004", 2500.00, "2020-06-01 12:00:00"),
(105, "sku_002", 2000.00, "2020-06-01 13:00:00"),
(106, "sku_002", 12000.00, "2020-06-01 13:00:00"),
(109, "sku_002", 600.00, "2020-06-02 12:00:00")
)).toDF( "id","sku_id","total_amount","create_time")
//设置连接参数
val config: Map[String, String] = Map[String, String](
"batchsize" -> "2000",
"isolationLevel" -> "NONE",
"numPartitions" -> "1"
)
val url="jdbc:clickhouse://192.168.174.204:8123/spark_clickhouse"
val dbtable="t_order_mt"
val user="admin"
val password="root"
val pro = new Properties()
pro.put("driver","ru.yandex.clickhouse.ClickHouseDriver")
pro.put("user","admin")
pro.put("password","root")
// 写入数据
data.write.mode(SaveMode.Append)
.options(config)
.jdbc(url,dbtable,pro)
//关闭资源
spark.stop()
}
}
四.制作的sparkUtils
object sparkUtils {
//本地运行使用
def sparkConf(local:String,appName:String): SparkContext = {
val conf: SparkConf = new SparkConf().setAppName(appName).setMaster(local)
val context = new SparkContext(conf)
return context
}
//集群运行使用
def sparkConfWithNoMaster(appName:String):SparkContext={
val conf: SparkConf = new SparkConf().setAppName(appName)
val context = new SparkContext(conf)
return context
}
//本地使用
def sparkSessionWithMaster(local:String,appName:String) :SparkSession ={
val spark: SparkSession = SparkSession.builder()
.appName(appName)
.master(local)
.getOrCreate()
return spark
}
//集群运行
def sparkSessionWithNoMaster(appName:String) :SparkSession ={
val spark: SparkSession = SparkSession.builder()
.appName(appName)
.getOrCreate()
return spark
}
}