目录
环境
Windows,VMware,Linux,hive-1.2.1
行列互转
题目 列转行
==>
create table ab(
a int comment '年份',
b varchar(3) comment '大写英文字母'
) comment '合并与拆分';
insert into ab
values
(2014,'A'),
(2014,'B'),
(2015,'C'),
(2015,'D');
解决 collect_list
select
a,
concat_ws('-',collect_list(b)) b -- 列转数组 输出格式
from ab
group by a;
题目 逆向 行转列
解决1 lateral view explode
with tmp as(-- 作为源表
select
a,
concat_ws('-',collect_list(b)) b -- 列转数组 输出格式
from ab
group by a
)
select a,b,val
from tmp -- 按行输出元素 关联范围笛卡尔积
lateral view explode(split(b,'-')) ex as val;
解决2 数组下标取元素
with tmp as(-- 作为源表
select
a,
concat_ws('-',collect_list(b)) b -- 列转数组 输出格式
from ab
group by a
)
select a,b,split(b,'-')[0] val from tmp union all
select a,b,split(b,'-')[1] val from tmp;
扩充数据
题目 扩充为连续整数
==>
create table extended_data(
a int comment '正整数'
) comment '扩充数据';
insert into extended_data
values
(2),
(4),
(7);
解决 笛卡尔积
with tmp as(-- 连续整数1-7
select row_number() over() rn
from (select explode(split(space(6),' ')))tmp
),
tmp1 as(-- 笛卡尔积
select a,rn
from extended_data,tmp
where a >= rn
sort by a,rn desc -- 若直接分组 列转数组无序
)
select -- 输出格式
a,
concat_ws(',',collect_list(cast(rn as string))) b_desc, -- 降序
concat_ws(',',sort_array(collect_list(cast(rn as string)))) b_asc -- 升序
from tmp1
group by a;
题目 扩充为自身
=>
table extended_data1(
a increate t comment '正整数'
) comment '扩充数据';
insert into extended_data1
values
(2),
(3),
(3),
(4);
解决 标记过滤
-- 1.连续整数不行=>结果是自身列
with tmp as(-- 2.标记每一行 为笛卡尔积作为过滤条件
select a,row_number() over() rn from extended_data1
),
tmp1 as(-- 笛卡尔积
select m.rn,m.a,n.a b,n.rn rn1
from tmp m,tmp n
where m.rn >= n.rn
sort by m.a,rn,b desc -- 直接分组 列转数组无序
)
select -- 输出格式
a,
concat_ws('-',collect_list(cast(b as string))) b_desc, -- 降序
concat_ws('-',sort_array(collect_list(cast(b as string)))) b_asc -- 升序
from tmp1 -- 相同值分组
group by rn,a;
展开数据
题目 展开字符串
展开字符串"1-5,16,11-13,9",结果:"1,2,3,4,5,16,11,12,13,9"顺序不变
思路
炸裂函数,连续整数1-16,笛卡尔积:交叉连接;lateral view 炸裂函数(切入点)
过滤条件(难点)
解决1 交叉连接
with tmp as(-- 连续整数1-16
select row_number() over() rn
from (select explode(split(space(15),' ')))tmp
),
tmp1 as(select posexplode(split('1-5,16,11-13,9',','))), --按行输出元素 位置
tmp2 as(-- 笛卡尔积
select pos,val,rn
from tmp1,tmp
where
rn between cast(split(val,'-')[0] as string) and cast(split(val,'-')[1] as string)
or
rn = val -- 16 => [0,1] => [16,null] => 过滤不出数据
sort by pos,rn -- 为了结果有序
)
select -- 输出格式
'1-5,16,11-13,9' str_original,
concat_ws(',',collect_list(cast(rn as string))) str_extended
from tmp2;
解决2 lateral view 炸裂函数
with tmp as(-- 连续整数1-16
select row_number() over() rn
from (select explode(split(space(15),' ')))tmp
),
tmp1 as(-- 笛卡尔积
select pos,val,rn
from tmp
lateral view posexplode(split('1-5,16,11-13,9',',')) pes as pos,val
where
rn between cast(split(val,'-')[0] as string) and cast(split(val,'-')[1] as string)
or
rn = val -- 16 => [0,1] => [16,null] => 过滤不出数据
sort by pos,rn -- 为了结果有序
)
select -- 输出格式
'1-5,16,11-13,9' str_original,
concat_ws(',',collect_list(cast(rn as string))) str_extended
from tmp1;