数据准备
创建表
create table if not exists cookies(
cookieid string,
cookie_day string,
cookie_month string,
cookie_status bigint
)comment 'group测试'
PARTITIONED BY (ds string) lifecycle 1000;
数据
insert into cookies PARTITION(ds = '20210401')
values ('10001','2021-2-12','2021-2','1'),
('10002','2021-2-12','2021-2','2'),
('10003','2021-2-13','2021-2','1'),
('10004','2021-2-13','2021-2','2'),
('10005','2021-2-13','2021-2','3'),
('10006','2021-2-14','2021-2','1'),
('10007','2021-2-14','2021-2','2'),
('10008','2021-2-14','2021-2','3'),
('10009','2021-2-14','2021-2','4'),
('10010','2021-2-14','2021-2','5'),
('10011','2021-2-14','2021-2','6'),
('10012','2021-2-14','2021-2','7'),
('10013','2021-3-1','2021-3','1'),
('10014','2021-3-2','2021-3','1'),
('10015','2021-3-2','2021-3','2'),
('10016','2021-3-2','2021-3','3'),
('10017','2021-3-2','2021-3','4'),
('10018','2021-3-2','2021-3','5'),
('10019','2021-3-2','2021-3','6'),
('10020','2021-3-3','2021-3','1'),
('10021','2021-3-3','2021-3','2'),
('10022','2021-3-3','2021-3','3'),
('10020','2021-3-4','2021-3','1'),
('10021','2021-3-4','2021-3','2'),
('10022','2021-3-4','2021-3','3'),
('10023','2021-3-4','2021-3','4'),
('10024','2021-3-4','2021-3','5'),
('10025','2021-4-5','2021-4','1'),
('10026','2021-4-5','2021-4','2'),
('10027','2021-4-5','2021-4','3'),
('10028','2021-4-5','2021-4','4'),
('10029','2021-4-5','2021-4','5'),
('10030','2021-4-5','2021-4','6'),
('10031','2021-4-5','2021-4','7'),
('10032','2021-4-5','2021-4','8'),
('10033','2021-4-5','2021-4','9');
alter table daole_cookies_log drop partition (ds='20210401');
select * from daole_cookies_log;
(1)group by
select cookie_month,cookie_date,count(cookie_id) as total
from cookies
where ds = '20210401'
group by cookie_month,cookie_date
结果:
(2)grouping sets
在一个GROUP BY查询中,根据不同的维度组合进行聚合,等价于将不同维度的GROUP BY结果集进行UNION ALL。
group by A,B,C
grouping sets (A,B,C)
- (A)
- (B)
- (C)
group by A,B,
grouping sets (A,B,(A,B))
- (A)
- (B)
- (A)(B)
例如:
select
cookie_month,
cookie_date,
count(cookie_id) as uv
from cookies
group by cookie_month,cookie_date
grouping sets (cookie_month,cookie_date)
等价于:
SELECT cookie_month,NULL,COUNT(cookie_id) AS uv
FROM cookies
GROUP BY cookie_month
UNION ALL
SELECT NULL,cookie_date,COUNT(cookie_id) AS uv
FROM cookies
GROUP BY cookie_date
再比如:
select cookie_month,cookie_date,count(cookie_id) as total
from cookies
where ds = '20210401'
group by cookie_month,cookie_date
grouping sets (cookie_month,
cookie_date,
(cookie_month,cookie_date)
);
等价于:
SELECT cookie_month,NULL,COUNT(cookie_id) AS uv
FROM cookies GROUP BY cookie_month
UNION ALL
SELECT NULL,cookie_date,COUNT(cookieid) AS uv
FROM cookies GROUP BY cookie_date
UNION ALL
SELECT cookie_month,cookie_date,COUNT(cookieid) AS uv
FROM cookies GROUP BY cookie_month,cookie_date
(3)cube
group by A,B,C with cube
- (A、B、C)进行group by
- (A) 进行group by
- (B) 进行group by
- (C) 进行group by
- (A)(B)进行group by
- (A)(C)进行group by
- (B)(C)进行group by
- 全表sum
select cookie_month,cookie_date,count(cookie_id) as total
from cookies
where ds = '20210401'
group by cookie_month,cookie_date
with CUBE ;
(4)rollup
rollup 是根据维度在数据结果集中进行的聚合操作。
group by A,B,C with rollup
首先会对(A、B、C)进行group by,然后对(A、B)进行group by,然后是(A)进行group by,最后对全表进行group by操作。