Json数据
第一次写博客,mark一下 ——20191025
下面是一条json数据,导入hive中类型为string
{
"user_id": "u0001",
"view_params": "order_condition=03&order_type=1&key=华为手机",
"exts": {"target_type": "04","target_category": "100","target_ids": "[1,2,3]"},
"ct": "1567429965000"
}
HIVE SQL解析
用了三种方式,得出数据如下,前两种方法看起来更清爽,并且效率高:
第一种方式
select
get_json_object(b,'$.user_id') as user_id,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',1) as order_condition,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',2) as order_type,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',3) as key,
get_json_object(get_json_object(b,'$.exts'),'$.target_type') as target_type,
get_json_object(get_json_object(b,'$.exts'),'$.target_category') as target_category,
get_json_object(get_json_object(b,'$.exts'),'$.target_ids') as target_ids,
from_unixtime(cast((get_json_object(b,'$.ct')/1000)as bigint),'yyyyMMddHH') as ct
from log_json;
第二种方式
select
get_json_object(b,'$.user_id') as user_id,
str_to_map(get_json_object(b,'$.view_params'),"&","=")['order_condition'] as order_condition,
str_to_map(get_json_object(b,'$.view_params'),"&","=")['order_type'] as order_type,
str_to_map(get_json_object(b,'$.view_params'),"&","=")['key'] as key,
get_json_object(get_json_object(b,'$.exts'),'$.target_type') as target_type,
get_json_object(get_json_object(b,'$.exts'),'$.target_category') as target_category,
get_json_object(get_json_object(b,'$.exts'),'$.target_ids') as target_ids,
from_unixtime(cast((get_json_object(b,'$.ct')/1000)as bigint),'yyyyMMddHH') as ct
from log_json;
第三种方式
select
t.user_id,
split(split(t.view_params,'&')[0],'=')[1] as order_condition,
split(split(t.view_params,'&')[1],'=')[1] as order_type,
split(split(t.view_params,'&')[2],'=')[1] as key,
tmp1.target_type,
tmp1.target_category,
tmp1.target_ids,
from_unixtime(cast(t.ct/1000 as bigint),'yyyyMMddHH') as ct
from
(select
tmp.user_id,
tmp.view_params,
tmp.exts,
tmp.ct
from log_json
lateral view json_tuple(b,'user_id','view_params','exts','ct') tmp as
user_id,view_params,exts,ct) t
lateral view json_tuple(exts,'target_type','target_category','target_ids') tmp1 as
target_type,target_category,target_ids;
string类型的数组形式的行转列拆分
大家可以看出来target_ids字段为string类型的数组,如果想拆开来,
可以用以下lateral view+explode+split+regext_replace方式,当然union all拆分也可以,一般也不会这么用,如果有更好的方式请留言,互相学习,谢谢~
下面是我的方式:
select
get_json_object(b,'$.user_id') as user_id,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',1) as order_condition,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',2) as order_type,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',3) as key,
get_json_object(get_json_object(b,'$.exts'),'$.target_type') as target_type,
get_json_object(get_json_object(b,'$.exts'),'$.target_category') as target_category,
from_unixtime(cast((get_json_object(b,'$.ct')/1000)as bigint),'yyyyMMddHH') as ct,
tmp.target_ids
from log_json
lateral view explode(split(regexp_replace(get_json_object(get_json_object(b,'$.exts'),'$.target_ids'),'[\\[\\]]',''),',')) tmp as target_ids;
数据展现: