行转列
建表语句与数据录入
use data_warehouse_test;
CREATE TABLE IF NOT EXISTS user_order (
uid BIGINT
,order_id BIGINT
);
CREATE TABLE IF NOT EXISTS user_orders (
uid BIGINT
,order_ids STRING
);
INSERT OVERWRITE TABLE user_order VALUES
(1, 112)
,(1, 123)
,(2, 234)
,(2, 21)
,(3, 821)
;
两种实现行转列:
- 第一种实现:
concat_ws(',',COLLECT_LIST(column))
与concat_ws(',',COLLECT_SET(column))
都可实现。
区别:collect_list 不去重,collect_set 去重。
注意: column的数据类型要求是string
结论:CONCAT_WS(SEPARATOR ,collect_set(column)) =======> GROUP_CONCAT(distinct column)函数
--1.
INSERT OVERWRITE TABLE user_orders
SELECT
uid
,CONCAT_WS(',', COLLECT_SET(order_str)) AS order_list
FROM
(
SELECT uid , CAST(order_id AS STRING) AS order_str
FROM user_order
) tmp
GROUP BY uid
;
结果:
+------------------+------------------------+
| user_orders.uid | user_orders.order_list |
+------------------+------------------------+
| 1 | 112,123 |
| 2 | 234,21 |
| 3 | 821 |
+------------------+------------------------+
- 第二种实现:
group_concat函数
完整的语法如下:
group_concat([DISTINCT] 要连接的字段 [Order BY ASC/DESC 排序字段] [Separator '分隔符'])
--2.
INSERT OVERWRITE TABLE user_orders
SELECT
uid
,group_concat(distinct order_str ORDER BY order_str DESC SEPARATOR ',') AS order_list
FROM
(
SELECT uid , CAST(order_id AS STRING) AS order_str
FROM user_order
) tmp
GROUP BY uid
;
结果:
+------------------+------------------------+
| user_orders.uid | user_orders.order_list |
+------------------+------------------------+
| 1 | 123,112 |
| 2 | 234,21 |
| 3 | 821 |
+------------------+------------------------+
列转行
建表语句与数据录入
CREATE TABLE IF NOT EXISTS explode_laterview_org(
day1_num BIGINT
,day2_num BIGINT
,day3_num BIGINT
,day4_num BIGINT
,day5_num BIGINT
,day6_num BIGINT
,day7_num BIGINT
,campaign_name STRING
,campaign_id BIGINT
);
INSERT OVERWRITE TABLE explode_laterview_org VALUES
(40, 20, 10, 4, 4, 2, 1, 'zoo', 2 )
,(100, 80, 53, 40, 7, 6, 5, 'moji', 3)
;
需输出:
+------------------+--------------------+-----------+---------------+
| _u1.campaign_id | _u1.campaign_name | _u1._c2 | _u1.day1_num |
+------------------+--------------------+-----------+---------------+
| 2 | zoo | day1_num | 40 |
| 2 | zoo | day2_num | 20 |
| 2 | zoo | day3_num | 10 |
| 2 | zoo | day4_num | 4 |
| 2 | zoo | day5_num | 4 |
| 2 | zoo | day6_num | 2 |
| 2 | zoo | day7_num | 1 |
| 3 | moji | day1_num | 100 |
| 3 | moji | day2_num | 80 |
| 3 | moji | day3_num | 53 |
| 3 | moji | day4_num | 40 |
| 3 | moji | day5_num | 7 |
| 3 | moji | day6_num | 6 |
| 3 | moji | day7_num | 5 |
+------------------+--------------------+-----------+---------------+
实现列转行:使用lateral view和str_to_map
SELECT
campaign_id, campaign_name, type, num
FROM explode_laterview_org
LATERAL VIEW
EXPLODE(
STR_TO_MAP(
CONCAT(
'day1_num=',CAST (day1_num AS STRING),
'&day2_num=',CAST (day2_num AS STRING),
'&day3_num=',CAST (day3_num AS STRING),
'&day4_num=',CAST (day4_num AS STRING),
'&day5_num=',CAST (day5_num AS STRING),
'&day6_num=',CAST (day6_num AS STRING),
'&day7_num=',CAST (day7_num AS STRING)
)
,'&', '=')
) lateral_table AS type, num
;
结果:
+--------------+----------------+-----------+------+
| campaign_id | campaign_name | type | num |
+--------------+----------------+-----------+------+
| 2 | zoo | day1_num | 40 |
| 2 | zoo | day2_num | 20 |
| 2 | zoo | day3_num | 10 |
| 2 | zoo | day4_num | 4 |
| 2 | zoo | day5_num | 4 |
| 2 | zoo | day6_num | 2 |
| 2 | zoo | day7_num | 1 |
| 3 | moji | day1_num | 100 |
| 3 | moji | day2_num | 80 |
| 3 | moji | day3_num | 53 |
| 3 | moji | day4_num | 40 |
| 3 | moji | day5_num | 7 |
| 3 | moji | day6_num | 6 |
| 3 | moji | day7_num | 5 |
+--------------+----------------+-----------+------+
函数说明:
concat_ws()函数
功能:一次性指定分隔符
语法:concat_ws(separator,str1,str2,...)
说明:第一个参数指定分隔符 分隔符不能为空 如果为NULL 则返回值NULL
collect_set()函数
功能:将colname指定的列值聚合为一个无重复元素的数组
语法:collect_set(<colname>)
说明:colname:必填。表的列名称,可以为任意类型。
group_concat()函数
功能:将group by产生的同一个分组中的值连接起来 返回一个字符串结果
语法 group_concat([distinct]要连接的字段 [order by 排序字段 asc/desc][separator'分隔符'])
说明:通过使用distinct可以排除重复值 如果希望对结果中的值进行排序 可以使用order by子句 separator是一个字符串值
缺省为一个逗号
参考文章:
1.【学习笔记】hive 之行拆列explode
https://yq.aliyun.com/articles/654743
HIVE 中内置了 EXPLODE 和 LATERAL VIEW,这两个 Function 非常的有用。尤其是在行转列的情形下。
2.hive中的lateral view 与 explode函数的使用
https://blog.csdn.net/guodong2k/article/details/79459282
3.
https://blog.csdn.net/qq_34206560/article/details/88358982
4.
https://help.aliyun.com/document_detail/48975.html?utm_content=g_1000230851&spm=5176.20966629.toubu.3.f2991ddcpxxvD1#title-7zg-lre-3es