- 字符串数组
- 解析: 使用 from_json 将字符串转为数组,再使用 explode 切分数组 获取最终结果
[
{
"real_name":"饶**",
"user_photo":null,
"gender":"男",
"id_card_no":"5224********3614",
"start_date":"2016-10-26T00:00:00",
"expiry_date":"2036-10-26T00:00:00"
},
{
"real_name":"陈**",
"user_photo":null,
"gender":"男",
"id_card_no":"522427********712",
"start_date":"2016-01-27T00:00:00",
"expiry_date":"2026-01-27T00:00:00"
}
]
val jsonstr =
"""
|[{"real_name":"饶祥雨","user_photo":null,"gender":"男","birthday":"1989-10-13T00:00:00","id_card_no":"5224********3614","start_date":"2016-10-26T00:00:00","expiry_date":"2036-10-26T00:00:00"},{"real_name":"陈树林","user_photo":null,"gender":"男","birthday":"1995-06-19T00:00:00","id_card_no":"522427********712","start_date":"2016-01-27T00:00:00","expiry_date":"2026-01-27T00:00:00"}]
|""".stripMargin
val josnsql=
s"""
| select
| 1 as id,
| '${jsonstr }' as data
|""".stripMargin
val resultDS = spark.sql(josnsql)
import spark.implicits._
import org.apache.spark.sql.types._
val jsonDF = resultDS.select(
functions.from_json($"data",
ArrayType(
StructType(
StructField("real_name", StringType)
:: StructField("gender", StringType)
::StructField("id_card_no", StringType)
:: Nil)
)
).as("dataArray"),$"id"
).withColumn("items",functions.explode(functions.col("dataArray")))
.select($"id",$"items.*")
jsonDF.show(100)
jsonDF.printSchema()
-------------------结果----------------------------------------------------------------
+---+---------+------+------------------+
| id|real_name|gender| id_card_no|
+---+---------+------+------------------+
| 1| 饶**| 男|5224********3614|
| 1| 陈**| 男|522427********712|
+---+---------+------+------------------+
root
|-- id: integer (nullable = false)
|-- real_name: string (nullable = true)
|-- gender: string (nullable = true)
|-- id_card_no: string (nullable = true)
- 简单json
- 解析 直接使用get_json_object()方法即可
get_json_object(json_txt, path) - Extracts a json object from path.
{
"project_name":"巍山新天地项目",
"project_type":1,
"principal_name":"某某",
"principal_phone":"13000000000",
"province_code":"330000",
"city_code":"330700",
"county_code":"330783"
}
val jsonsql2=
"""
|{"project_name":"巍山新天地项目","project_type":1,"principal_name":"某某","principal_phone":"13000000000","province_code":"330000","city_code":"330700","county_code":"330783"}
|""".stripMargin
import spark.implicits._
val JSONDF = Seq ((0, s"""${jsonsql2}""")).toDF("id", "json")
val resultDS = JSONDF.select(
functions.get_json_object($"json","$.project_name").as("project_name"),
functions.get_json_object($"json","$.project_type").as("project_type"),
functions.get_json_object($"json","$.principal_phone").as("principal_phone")
)
resultDS.show()
resultDS.printSchema()
-----------------------------------------结果-------------------------------+------------+------------+---------------+
|project_name|project_type|principal_phone|
+------------+------------+---------------+
| 巍山新天地项目| 1| 13000000000|
+------------+------------+---------------+
root
|-- project_name: string (nullable = true)
|-- project_type: string (nullable = true)
|-- principal_phone: string (nullable = true)