1、sparkSQL操作需要创建SparkSession
val sparkSession:SparkSession = SparkSession.builder()
.appName("JdbcSource")
.master("local[2]")
.getOrCreate()
2、读取json,csv数据源
val jread:DataFrame = sparkSession.read.json("e:/saveJson")
val jread:DataFrame = sparkSession.read.csv("e:/saveCsv")
import sparkSession.implicits._
3、处理数据
val fread:Dataset[Row] = jread.filter($"xueyuan" === "bigdata")
val jfread:DataFrame = jread.toDF("id","xueyuan")//这里相当于给数据集重命名
val rs = jfread.filter($"id" <3)
rs.show
4、触发action
fread.show()
5、关闭资源
sparkSession.stop()