1. Spark读取MySQL数据
1. spark.read.jdbc()
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().appName("sparksql").master("local").getOrCreate()
val prop = new Properties()
prop.put("user", "root")
prop.put("password", "863863")
val url = "jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC"
val dataFrame = spark.read.jdbc(url,"test01",prop).select("id").where("id >= 3").show()
spark.stop()
}
2. spark.read.format().option().load()
object SparkReadMysql {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.appName("sparksql")
.master("local")
.getOrCreate()
//useUnicode=true&characterEncoding=UTF-8 编码
//serverTimezone=UTC 时区
val dataDF = spark.read.format("jdbc")
.option("url","jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC")
.option("dbtable","test01")
.option("user","root")
.option("password","863863")
.load()
dataDF.createOrReplaceTempView("tmptable")
val sql = "select * from tmptable where id >= 3"
spark.sql(sql).show()
spark.stop()
}
}
2. Spark写数据到MySQL
spark.write.mode().jdbc()
1. 查询后写入
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().appName("sparksql").master("local").getOrCreate()
val prop = new Properties()
prop.put("user", "root")
prop.put("password", "863863")
val url = "jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC"
val dataFrame = spark.read.jdbc(url,"test01",prop).where("id >= 3")
dataFrame.write.mode(SaveMode.Append).jdbc(url,"test02",prop)
spark.stop()
}
1. 通过构建DataFrame再写入
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.appName("test")
.master("local")
.getOrCreate()
val prop = new Properties()
prop.put("user","root")
prop.put("password","863863")
val url = "jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC"
val rdd = spark.sparkContext.textFile("d://data/word.txt")
val rdd2 = rdd.flatMap(_.split(",")).distinct().zipWithIndex().map(t =>{Row(t._2,t._1)})
val schema = StructType{
List(
StructField("id",LongType,true),
StructField("user",StringType,true)
)}
val dataFrame = spark.createDataFrame(rdd2,schema)
dataFrame.write.mode(SaveMode.Overwrite).jdbc(url,"test02",prop)
spark.stop()
}
3. 演示
1. spark.read.jdbc()
2. spark.read.format().option().load()
3. Spark查询数据写入MySQL