对以下文件进行JSON解析处理
[
{
"id": "392456197008193000",
"name": "张三",
"age": 20,
"gender": 0,
"province": "北京市",
"city": "昌平区",
"region": "回龙观",
"phone": "18589407692",
"birthday": "1970-08-19",
"hobby": "美食;篮球;足球",
"register_date": "2018-08-06 09:44:43"
},
{
"id": "267456198006210000",
"name": "李四",
"age": 25,
"gender": 1,
"province": "河南省",
"city": "郑州市",
"region": "郑东新区",
"phone": "18681109672",
"birthday": "1980-06-21",
"hobby": "音乐;阅读;旅游",
"register_date": "2017-04-07 09:14:13"
},
{
"id": "892456199007203000",
"name": "王五",
"age": 24,
"gender": 1,
"province": "湖北省",
"city": "武汉市",
"region": "汉阳区",
"phone": "18798009102",
"birthday": "1990-07-20",
"hobby": "写代码;读代码;算法",
"register_date": "2016-06-08 07:34:23"
},
{
"id": "492456198712198000",
"name": "赵六",
"age": 26,
"gender": 2,
"province": "陕西省",
"city": "西安市",
"region": "莲湖区",
"phone": "18189189195",
"birthday": "1987-12-19",
"hobby": "购物;旅游",
"register_date": "2016-01-09 19:15:53"
},
{
"id": "392456197008193000",
"name": "张三",
"age": 20,
"gender": 0,
"province": "北京市",
"city": "昌平区",
"region": "回龙观",
"phone": "18589407692",
"birthday": "1970-08-19",
"hobby": "美食;篮球;足球",
"register_date": "2018-08-06 09:44:43"
},
{
"id": "392456197008193000",
"name": "张三",
"age": 20,
"gender": 0,
"province": "北京市",
"city": "昌平区",
"region": "回龙观",
"phone": "18589407692",
"birthday": "1970-08-19",
"hobby": "美食;篮球;足球",
"register_date": "2018-08-06 09:44:43"
}
]
代码如下:
import com.alibaba.fastjson.JSON
import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}
object Test1 {
def main(args: Array[String]): Unit = {
//初始化 Spark配置
val conf = new SparkConf().setAppName("test1").setMaster("local")
val sc = new SparkContext(conf)
//读取JSON格式文件
val lineRDD = sc.textFile("src/main/resources/data/user.json")
/**
* 将数据通过reduce进行拼接成一行,
* 通过Alibaba的JSON工具类进行解析JSON数组
* 将解析后的数据存入样例类中
*/
val jsonList = JSON.parseArray(lineRDD.reduce(_ + _), classOf[Stu])
val stuList = jsonList.toArray().toList.asInstanceOf[List[Stu]]
//创建 sparksession 对象
val spark = SparkSession.builder().config(conf).getOrCreate()
//将转换后的数据转为 RDD
val stuRDD = sc.makeRDD(stuList)
//map转换数据结构,只得到 id,name,age
val value = stuRDD.map(i => {
(i.id, i.name, i.age)
})
//将RDD 转为DF 并添加字段名
val frame = spark.createDataFrame(value).toDF("id", "name", "age")
//进行展示
frame.show()
//释放资源
sc.stop()
}
}
//学生样例类
case class Stu(
id: String,
name: String,
age: String,
gender: String,
province: String,
city: String,
region: String,
phone: String,
birthday: String,
hobby: String,
register_date: String
)
结果如下图所示: