1.列转行案例1
-- 初始化数据
create table if not exists in_2_out.userlog_page_chain_test(
page_chain string comment '一次操作中的页面点击链',
staytime String comment '页面停留时长(ms)'
)
row format delimited fields terminated by ',' lines terminated by '\n';
insert into table in_2_out.userlog_page_chain_test values('1-2-3','0-200-100');
insert into table in_2_out.userlog_page_chain_test values('2-3-4','0-500-200');
insert into table in_2_out.userlog_page_chain_test values('1-2-3','0-300-500');
insert into table in_2_out.userlog_page_chain_test values('2-3-4','0-100-500');
insert into table in_2_out.userlog_page_chain_test values('2-3-4','0-600-200');
select * from in_2_out.userlog_page_chain_test
page_chain staytime
1-2-3 0-200-100
2-3-4 0-100-500
2-3-4 0-500-200
2-3-4 0-600-200
1-2-3 0-300-500
-- 将相同page_chain的staytime进行汇总
select page_chain,concat_ws('-',sum(cast(split[size(split)-3] as int)),sum(cast(split[size(split)-2] as int)),sum(cast(split[size(split)-1] as int))) from
(
select page_chain,split(staytime,"-") split from in_2_out.userlog_page_chain_test
) group by page_chain;
2-3-4 0-1200-900
1-2-3 0-500-600
2.列转行案例2
SELECT user_id
,user_phone
,concat_ws(
','
,collect_set(
IF(LENGTH(split(address,'省')[0])>3 ,IF(split(address,'-')[1] IS NULL,NULL,split(address,'-')[0]),split(address,'省')[0])
)
) AS province
,concat_ws(
','
,collect_set(
IF(address LIKE '%市%',substr(split(address,'市')[0],-2),null)
)
) AS city
FROM xxxxxx
WHERE ds = '2020701'
AND to_char(gmt_last,'yyyymmdd') >= to_char(dateadd(to_date('20200701','yyyymmdd'), - 365,'dd'),'yyyymmdd')
GROUP BY user_id
,user_phone
;
collect_set可以将相同数据去重,还能够自动过滤掉NULL的值
参考资料
Hive列转行、行转列
https://blog.csdn.net/ifenggege/article/details/107852199