背景
如果需要对一张表进行不同维度的分析(月,天,月和天)传统SQL需要多次查询表然后在需要聚合
问题:如果这个表非常的大,加上mr执行效率可能非常的慢
解决:使用聚合函数,一次查表,然后合并
sql
--3个分组统计而已,简单。统计完再使用union all合并结果集。
--注意union all合并结果集需要各个查询返回字段个数、类型一致,因此需要合理的使用null来填充返回结果。
select month,
null,
count(cookieid)
from test.t_cookie
group by month
union all
select null,
day,
count(cookieid)
from test.t_cookie
group by day
union all
select month,
day,
count(cookieid)
from test.t_cookie
group by month,day;
grouping sets
select
month,day,count(cookieid)
from test.t_cookie
group by month,day
grouping sets (month,day,(month,day));
cuberollup
select month,day,count(cookieid)
from test.t_cookie
group by
cube (month, day);
--上述sql等价于
select month,day,count(cookieid)
from test.t_cookie
group by
grouping sets ((month,day), month, day, ());
rollup
select month,day,count(cookieid)
from test.t_cookie
group by
rollup (month,day);
--等价于
select month,day,count(cookieid)
from test.t_cookie
group by
grouping sets ((month,day), (month), ());
grouping
select month,
day,
count(cookieid),
grouping(month) as m,
grouping(day) as d,
grouping(month, day) as m_d
from test.t_cookie
group by
grouping sets (month, day, (month, day));