行转列
准备数据和表
-- 准备数据
1,zss1,语文,76
1,zss1,英语,56
1,zss1,数学,98
2,zss2,语文,88
2,zss2,英语,97
2,zss2,数学,67
3,zss3,语文,96
3,zss3,英语,87
3,zss3,数学,79
4,zss4,语文,84
4,zss4,英语,79
4,zss4,数学,84
-- 创建表
drop table tb_a;
create table tb_a(
id int,
name string,
course string,
score double
)
row format delimited fields terminated by ',';
-- 导入数据
load data local inpath'/data/sc.csv' into table tb_a;
COLLECT_SET(col):函数只接受基本数据类型,它的主要作用是将某字段的值进行去重汇总,产生array类型字段。 将某列数据转换成数组
-- 看看表中的数据
select * from tb_a;
+----------+------------+--------------+-------------+
| tb_a.id | tb_a.name | tb_a.course | tb_a.score |
+----------+------------+--------------+-------------+
| 1 | zss1 | 语文 | 76.0 |
| 1 | zss1 | 英语 | 56.0 |
| 1 | zss1 | 数学 | 98.0 |
| 2 | zss2 | 语文 | 88.0 |
| 2 | zss2 | 英语 | 97.0 |
| 2 | zss2 | 数学 | 67.0 |
| 3 | zss3 | 语文 | 96.0 |
| 3 | zss3 | 英语 | 87.0 |
| 3 | zss3 | 数学 | 79.0 |
| 4 | zss4 | 语文 | 84.0 |
| 4 | zss4 | 英语 | 79.0 |
| 4 | zss4 | 数学 | 84.0 |
+----------+------------+--------------+-------------+
-- collect_set(col) 聚合函数 将组内的数据收集到数组中 会去重
select
id,
collect_set(course)
from tb_a
group by id;
+-----+-------------------+
| id | _c1 |
+-----+-------------------+
| 1 | ["语文","英语","数学"] |
| 2 | ["语文","英语","数学"] |
| 3 | ["语文","英语","数学"] |
| 4 | ["语文","英语","数学"] |
+-----+-------------------+
--collect_list(col) 聚合函数 将组内的数据收集到数组中 不会去重
select
id,
collect_list(course)
from tb_a
group by id;
+-----+-------------------+
| id | _c1 |
+-----+-------------------+
| 1 | ["语文","英语","数学"] |
| 2 | ["语文","英语","数学"] |
| 3 | ["语文","英语","数学"] |
| 4 | ["语文","英语","数学"] |
+-----+-------------------+
列转行
准备数据和表
-- 准备数据
1,zss1,ks1_ks2,aa1_aa2_aa3
2,zss2,dq1_dq2,bb1_bb2_bb3
3,zss3,ly1_ly2,cc1_cc2_cc3
4,zss4,pb1_pb2,dd1_dd2_dd3
5,zss5,yx1_yx2,ee1_ee2_ee3
6,zss6,lh1_lh2,ff1_ff2_ff3
-- 创建表
drop table tb_a;
create table tb_a(
id int,
name string,
hobby array<string>, -- 爱好
friends array<string> -- 朋友
)
row format delimited fields terminated by ','
collection items terminated by '_';
-- 导入数据
load data local inpath '/data/friend.csv' into table tb_a;
由于explode的局限性,通常会与Lateral View结合使用,配合Explode(或者其他的UDTF),一个语句生成把单行数据拆解成多行后的数据结果集;Lateral view语法首先会将UDTF处理生成的结果放到一张虚拟表中,然后再将这个虚拟表和输入行进行关联实现添加列到select中。
-- 先看看表的数据
select * from tb_a;
+----------+------------+----------------+----------------------+
| tb_a.id | tb_a.name | tb_a.hobby | tb_a.friends |
+----------+------------+----------------+----------------------+
| 1 | zss1 | ["ks1","ks2"] | ["aa1","aa2","aa3"] |
| 2 | zss2 | ["dq1","dq2"] | ["bb1","bb2","bb3"] |
| 3 | zss3 | ["ly1","ly2"] | ["cc1","cc2","cc3"] |
| 4 | zss4 | ["pb1","pb2"] | ["dd1","dd2","dd3"] |
| 5 | zss5 | ["yx1","yx2"] | ["ee1","ee2","ee3"] |
| 6 | zss6 | ["lh1","lh2"] | ["ff1","ff2","ff3"] |
+----------+------------+----------------+----------------------+
-- 一个lateral view和explode查询的结果
-- 一个lateral view和explode会将hobby这一行拆成多行,生成一张虚拟表,然后这个虚拟表和tb_a的那一行进行关联
select
id,a1,b1
from tb_a
lateral view explode(hobby) a as a1
;
+-----+------+
| id | a1 |
+-----+------+
| 1 | ks1 |
| 1 | ks2 |
| 2 | dq1 |
| 2 | dq2 |
| 3 | ly1 |
| 3 | ly2 |
| 4 | pb1 |
| 4 | pb2 |
| 5 | yx1 |
| 5 | yx2 |
| 6 | lh1 |
| 6 | lh2 |
+-----+------+
-- 两个个lateral view和explode查询的结果
-- 两个lateral view和explode会分别将hobby和friend拆成多行,生成两张虚拟表a、b,然后虚拟表a现和表tb_a进行行关联,之后再和虚 -- 拟表b进行关联
select
id,a1,b1
from tb_a
lateral view explode(hobby) a as a1
lateral view explode(friends) b as b1
;
+-----+------+------+
| id | a1 | b1 |
+-----+------+------+
| 1 | ks1 | aa1 |
| 1 | ks1 | aa2 |
| 1 | ks1 | aa3 |
| 1 | ks2 | aa1 |
| 1 | ks2 | aa2 |
| 1 | ks2 | aa3 |
| 2 | dq1 | bb1 |
| 2 | dq1 | bb2 |
| 2 | dq1 | bb3 |
| 2 | dq2 | bb1 |
| 2 | dq2 | bb2 |
| 2 | dq2 | bb3 |
| 3 | ly1 | cc1 |
| 3 | ly1 | cc2 |
。。。