样例数据:
[{"website":"www.baidu.com","name":"百度"},{"website":"www.taobao.com","name":"淘宝"},{"website":"www.jd.com","name":"京东"}]
解决思路
我们可以通过以下办法解析json数组 :
先通过正则表达式将数组中的 , 用 ; 代替:
select regexp_replace('[{"website":"www.baidu.com","name":"百度"},{"website":"www.taobao.com","name":"淘宝"}]', '\\}\\,\\{','\\}\\;\\{');
结果:
[{"website":"www.baidu.com","name":"百度"};{"website":"www.taobao.com","name":"淘宝"}]
再通过正则表达式把数组中的 [ ] 去掉:
select regexp_replace(
regexp_replace(
'[{"website":"www.baidu.com","name":"百度"},{"website":"www.taobao.com","name":"淘宝"}]', '\\}\\,\\{','\\}\\;\\{'
),'\\[|\\]',''
);
结果:
{"website":"www.baidu.com","name":"百度"};{"website":"www.taobao.com","name":"淘宝"}
接下来用split函数按 ; 进行分割:
select
split(
regexp_replace(
regexp_replace(
'[{"website":"www.baidu.com","name":"百度"},{"website":"www.taobao.com","name":"淘宝"}]', '\\}\\,\\{','\\}\\;\\{'
),'\\[|\\]',''
),'\\;'
);
结果:
["{"website":"www.baidu.com","name":"百度"}","{"website":"www.taobao.com","name":"淘宝"}"]
再利用explode函数将数组转换按列输出:
select explode(
split(
regexp_replace(
regexp_replace(
'[{"website":"www.baidu.com","name":"百度"},{"website":"www.taobao.com","name":"淘宝"}]', '\\}\\,\\{','\\}\\;\\{'
),'\\[|\\]',''
),'\\;'
)
);
结果:
{"website":"www.baidu.com","name":"百度"}
{"website":"www.taobao.com","name":"淘宝"}
最后用get_json_object函数将explode转换的json格式的列进行解析。
select
get_json_object(busi_json, '$.website') as website,
get_json_object(busi_json, '$.name') as name
from data_table
LATERAL VIEW explode(split(regexp_replace(regexp_replace(busi_info, '\\}\\,\\{','\\}\\;\\{'),'\\[|\\]',''),'\\;'))busi as busi_json
where dt = '20200906'
group by get_json_object(busi_json, '$.website'),get_json_object(busi_json, '$.name')