hive 行列转化

用SQL判断字段串中连续出现两个以上的数字,并返回对应的数字
with a as (
select explode(split(regexp_replace(‘sdfsd1232sdf324’,‘[0-9]+’,‘,’),‘,’)) col
) select * from a where LENGTH(col)>1
结果为sdfsd
sdf
select explode(split(regexp_replace(‘sdfsd1232sdf324’,‘[0-9]+’,‘,’),‘,’))
union all
select explode(split(regexp_replace(‘sdfsd1232sdf324’,‘[a-z]+’,‘,’),‘,’))

select regexp_extract(‘honeymoon’, ‘hon(.*?)(moon)’,2);

select explode(array(‘A’,‘B’,‘C’))
select explode(map(‘a’,1,‘b’,2))

select posexplode(array(‘A’,‘B’,‘C’));

with t2 as ( select ‘1<’ as client_rights,‘a’ as cust_num
union all
select ‘CWe:’ as client_rights,‘a’ as cust_num
)
select
collect_set(t2.client_rights),
– concat_ws(‘’, collect_set(t2.client_rights),
instr(concat_ws(‘’, collect_set(t2.client_rights)), ‘<’),
if(instr(concat_ws(‘’, collect_set(t2.client_rights)), ‘<’) > 0,1,0)
from t2
group by cust_num

create table myhive.pageAds(
pageid string ,
adid_list array
)

create table dual(id string);

select 1+1 from dual;
insert into myhive.pageAds select ‘front_page’ as pageid,array(1,2,3) as adid_list from dual
insert into myhive.pageAds select ‘contact_page’ as pageid,array(3,4,5) as adid_list from dual
在这里插入图片描述

hdfs dfs -cat /user/hive/warehouse/myhive.db/pageads/000* | hdfs dfs -copyFromLocal - /user/hive/warehouse/myhive.db/pageads/pages; 小文件合并

在这里插入图片描述

select * from myhive.pageAds;
在这里插入图片描述

select pageid,adid from myhive.pageAds lateral view explode(adid_list) a as adid;
在这里插入图片描述
select pageid,adid_list,adid_list[0],adid_list[1],adid_list[2] from myhive.pageAds;
在这里插入图片描述

create table myhive.t_afan_test
(
info1 array,
info2 array
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘\t’
COLLECTION ITEMS TERMINATED BY ‘,’;

insert into myhive.t_afan_test select array(12,23,23,34) as info1,array(“what”,“are”,“this”) as info2 from dual;
insert into myhive.t_afan_test select array(56,78) as info1,array(“this”,“is”,“book”) as info2 from dual;
insert into myhive.t_afan_test select array(56,78) as info1,array(“this”,“is”,“book”) as info2 from dual;

select * from myhive.t_afan_test

insert overwrite table myhive.t_afan_test
select a.info1 info1,a.info2 info2 from ( select info1,info2,ROW_NUMBER() over(partition by info1 order by info2) as num from myhive.t_afan_test ) a where a.num=1; 去重

select * from myhive.t_afan_test

select a,b from myhive.t_afan_test lateral view explode(info1) ad as a lateral view explode(info2) ad as b;

select posexplode(info2) from myhive.t_afan_test;

select a.,b. from
(select ina,inb from myhive.t_afan_test
lateral view explode(info1) tmpTable1 as ina
lateral view explode(info2) tmpTable2 as inb) a
left join (select id,inb from myhive.t_afan_test lateral view posexplode(info2) ad as id,inb) b
on a.inb=b.inb

select h.inb,concat_ws(‘,’,collect_list(cast(h.ina as string ))) from (
select a.ina as ina,b.inb as inb,b.id as id from
(select ina,inb from myhive.t_afan_test
lateral view explode(info1) tmpTable1 as ina
lateral view explode(info2) tmpTable2 as inb) a
left join (
select inb,id from myhive.t_afan_test lateral view posexplode(info2) ac as id,inb
) b
on a.inb=b.inb
) h group by h.inb

select info1,info2,ina,inb,id1,id2 from myhive.t_afan_test
lateral view posexplode(info1) tmpTable1 as id1,ina
lateral view posexplode(info2) tmpTable2 as id2,inb
where id1=id2

select h.inb,concat_ws(‘,’,collect_list(cast(h.ina as string ))) from (
select a.ina as ina,b.inb as inb from
(select ina,inb from myhive.t_afan_test
lateral view explode(info1) tmpTable1 as ina
lateral view explode(info2) tmpTable2 as inb) a
left join (
select distinct inb from myhive.t_afan_test lateral view explode(info2) c as inb
) b
on a.inb=b.inb
) h group by h.inb

select c.deep,concat_ws(‘,’,collect_list(cast(c.info as string))) from (
select info,b.hobby hobby,b.deep deep from myhive.t_afan_test a lateral view posexplode(info2) b as hobby,deep
lateral view explode(a.info1) c as info) c
group by c.deep

–行列转换

create table myhive.row2col1(
col1 string,
col2 string,
col3 int
)row format delimited fields terminated by ‘,’;

insert into myhive.row2col1 values(‘a’,‘c’,1),
(‘a’,‘d’,2),
(‘a’,‘e’,3),
(‘b’,‘c’,4),
(‘b’,‘d’,5),
(‘b’,‘e’,6);

select * from myhive.row2col1;

select
col1 as col1,
max(case col2 when ‘c’ then col3 else 0 end) as c,
max(case col2 when ‘d’ then col3 else 0 end) as d,
max(case col2 when ‘e’ then col3 else 0 end) as e
from
myhive.row2col1 group by col1;
在这里插入图片描述

–多行转单列
create table myhive.row2col2(
col1 string,
col2 string,
col3 int
)row format delimited fields terminated by ‘,’;

insert into myhive.row2col2 values
(‘a’,‘b’,1),
(‘a’,‘b’,2),
(‘a’,‘b’,3),
(‘c’,‘d’,4),
(‘c’,‘d’,5),
(‘a’,‘b’,1),
(‘a’,‘b’,2),
(‘a’,‘b’,3),
(‘c’,‘d’,4),
(‘c’,‘d’,5),
(‘c’,‘d’,6);
select * from myhive.row2col2;
在这里插入图片描述

select col1,col2,concat_ws(‘,’,collect_set(cast(col3 as string))) from myhive.row2col2 group by col1,col2;–去重
在这里插入图片描述
select col1,col2,collect_set(col3),collect_set(col3)[0],collect_set(col3)[1],collect_set(col3)[2] from myhive.row2col2 group by col1,col2;
在这里插入图片描述
select col1,col2,collect_set(col3),collect_set(col3)[0],collect_set(col3)[1],collect_set(col3)[2],collect_set(col3)[3] from myhive.row2col2 group by col1,col2;
在这里插入图片描述

select col1,col2,concat_ws(‘,’,collect_list(cast(col3 as string))) from myhive.row2col2 group by col1,col2;–不去重

–多列转多行
create table myhive.col2row1(
col1 string,
col2 int,
col3 int,
col4 int
)row format delimited fields terminated by ‘,’;

insert into myhive.col2row1 values
(‘a’,1,2,3),
(‘b’,4,5,6);

select * from myhive.col2row1;
在这里插入图片描述

select col1, ‘c’ as col2, col2 as col3 from myhive.col2row1
UNION ALL
select col1, ‘d’ as col2, col3 as col3 from myhive.col2row1
UNION ALL
select col1, ‘e’ as col2, col4 as col3 from myhive.col2row1;
在这里插入图片描述

–单列转多行
create table col2row2(
col1 string,
col2 string,
col3 string
)row format delimited fields terminated by ‘,’; --建表用’,‘隔开,插入用’,'会少数据

create table myhive.col2row2(
col1 string,
col2 string,
col3 string
);

insert into myhive.col2row2 values(‘a’,‘b’,“1,2,3”),
(‘c’,‘d’,“4,5,6”);

select * from myhive.col2row2;
在这里插入图片描述

select col1,col2,col4 from myhive.col2row2 lateral view explode(split(col3,‘,’)) ad as col4

在这里插入图片描述

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值