[Spark] json解析
2020/10/17
-
sql 直接解析
利用
get_json_object(json, '$.field')
val sql = """ |select | get_json_object(json_data, '$.name') as name, | get_json_object(json_data, '$.sex') as sex, | get_json_object(json_data, '$.age') as age, | get_json_object(json_data, '$.height') as height, | get_json_object(json_data, '$.weight') as weight, | key |from database.table |where day = '2020-10-15' |""".stripMargin println(s"sql: ${sql}") val res = hiveContext.sql(sql).map(x => { val name = x.getString(0) val sex = x.getString(1) val age = x.getString(2).toInt val height = x.getString(3).toInt val weight = x.getString(4).toInt val key = x.getString(5).toInt (key, name, sex, age, height, weight) }).persist(StorageLevel.MEMORY_ONLY)
注意: 从 json 中解析出来的字段,要先用
x.getString(0)
,之后再用.toInt
等转换为对应的数据类型。不可以直接.getAs[Int](0)
或者getInt(0)
,会报错误(类型转换错误,String不可以转换为Double) -
借助spark + alibaba.fastjson解析
package com.simida import com.alibaba.fastjson.JSON // 引入json类库 import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.hive.HiveContext object ParseJsonData { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setAppName(this.getClass.getSimpleName) val sc = new SparkContext(sparkConf) val hiveContext = new HiveContext(sc) val sql = s""" |select json_data, | key |from database.table |where day = '2020-10-15' |""".stripMargin println(s"sql: ${sql}") val res = hiveContext.sql(sql).map(x => { val json_data = x.getString(0) val key = x.getString(1) val name = JSON.parseObject(json_data).getString("name") val sex = JSON.parseObject(json_data).getString("sex") val age = JSON.parseObject(json_data).getString("age") val height = JSON.parseObject(json_data).getString("height") val weight = JSON.parseObject(json_data).getString("weight") (key, json_data, name, sex, age, height, weight) }) } }
以上两者最终数据等价。