大数据系列篇-SPARK-SQL读写数据源
package com.test
import org.apache.spark.SparkConf
import org.apache.spark.sql.{SaveMode, SparkSession}
//练习read-write数据源
object SparkSqlRw {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setAppName("练习SparkSqlRw").setMaster("local[*]")
val spark = SparkSession.builder().config(sparkConf).getOrCreate()
//读json1
val df = spark.read.json("data/user.json")
//用VIEW显示
df.createOrReplaceTempView("user")
spark.sql("SELECT * FROM user").show()
//或读json2
spark.sql("SELECT * FROM json.`data/user.json`").show()
//写json
df.write.format("json").mode(SaveMode.Overwrite).save("data/output/json")
//默认写parquet
df.write.mode(SaveMode.Overwrite).save("data/output/parquet")
//读parquet
val df2 = spark.read.load("data/user.parquet")
val df3 = spark.read.parquet("data/user.parquet")
//显示
df2.show()
df3.show()
//用VIEW显示
df2.createOrReplaceTempView("user2")
spark.sql("SELECT age age2,userName userName2 FROM user2").show()
df3.createOrReplaceTempView("user3")
spark.sql("SELECT age as age3,userName as userName3 FROM user3").show()
//jdbc
// val df1 = spark.read.format("jdbc")
// .option("url", "")
// .option("driver", "")
// .option("user", "")
// .option("password", "")
// .option("dbtable", "")
// .load()
// df1.write.format("jdbc")
// .option("url", "")
// .option("driver", "")
// .option("user", "")
// .option("password", "")
// .option("dbtable", "")
// .mode(SaveMode.Append)
// .save()
spark.close()
}
}