准备一个json文件
{"id":1,"name":"Michael"}
{"id":2,"name":"Andy","Age":30}
{"id":3,"name":"Justin","Age":19}
方法一:引用JSON方法
object ReadJsonDemo {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("readcsv")
val sc: SparkContext = SparkContext.getOrCreate(conf)
val lines: RDD[String] = sc.textFile("in/users.json")
import scala.util.parsing.json.JSON
val rdd: RDD[Option[Any]] = lines.map(x=>JSON.parseFull(x))
rdd.collect().foreach(println)
}
]
方法二:SparkSession
object ReadJsonDemo {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("readcsv")
val spark: SparkSession = SparkSession.builder().config(conf).getOrCreate()
val userDF: DataFrame = spark.read.format("json").option("head",false).load("in/users.json")
userDF.select("id","age").show()
val idColumn: Column = userDF("id")
val nameColumn: Column = userDF("name")
val ageColumn: Column = userDF("age")
userDF.select(idColumn,nameColumn,ageColumn).show()
userDF.filter(ageColumn>20).show()
}
}