数据准备
-- 创建一个测试库
CREATE DATABASE dw;
-- 创建行转列测试数据表
CREATE TABLE IF NOT EXISTS dw.person
(
name string,
gender string,
age int,
hobby string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE
;
-- 数据内容
jack,male,20,music
marry,female,18,dance
carry,male,22,basketball
tom,male,24,movie
jack,male,20,read
marry,female,18,run
tom,male,24,movie
LOAD DATA LOCAL INPATH '/opt/data/person.csv' INTO TABLE dw.person;
-- 创建列转行测试数据表
CREATE TABLE IF NOT EXISTS dw.user_tag
(
name string,
tags string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE
;
-- 数据内容
tom,T0001/T0002/T0003
jack,T0002/T0001
marry,T0003/T0004
mac,T0002/T0005
LOAD DATA LOCAL INPATH '/opt/data/tag.csv' INTO TABLE dw.user_tag;
行转列
使用函数:concat_ws(",", collect_set(col)) 注意col字段必须为string类型
collect_set可以置换成collect_list,但是两者有点细微的区别
- collect_set:会对多行转列的字段去重
- collect_list:不会对多行转列的字段去重
-- 行转列:把同一个人的爱好使用,分隔组合,并进行去重
SELECT name, gender, age, concat_ws(",", collect_set(hobby)) AS hobbys
FROM dw.person
GROUP BY name, gender, age
;
+-----+------+---+----------+
|name |gender|age|hobbys |
+-----+------+---+----------+
|tom |male |24 |movie |
|carry|male |22 |basketball|
|jack |male |20 |music,read|
|marry|female|18 |run,dance |
+-----+------+---+----------+
-- 行转列:把同一个人的爱好使用,分隔组合,不进行去重
SELECT name, gender, age, concat_ws(",", collect_list(hobby)) AS hobbys
FROM dw.person
GROUP BY name, gender, age
;
+-----+------+---+-----------+
|name |gender|age|hobbys |
+-----+------+---+-----------+
|tom |male |24 |movie,movie|
|carry|male |22 |basketball |
|jack |male |20 |music,read |
|marry|female|18 |dance,run |
+-----+------+---+-----------+
列转行
使用函数:LATERAL VIEW explode(split(col, “/”)) tbl AS tag
参数解析
- col:需要拆分的列字段
- /:col字段分隔符
- tbl:列转行后的表别名
- tag:拆分后的字段名
SELECT name, tag
FROM dw.user_tag
LATERAL VIEW explode(split(tags, "/")) tbl AS tag
;
+-----+-----+
|name |tag |
+-----+-----+
|tom |T0001|
|tom |T0002|
|tom |T0003|
|jack |T0002|
|jack |T0001|
|marry|T0003|
|marry|T0004|
|mac |T0002|
|mac |T0005|
+-----+-----+