----spark统计结果
select
GROUPING__ID,
conv(GROUPING__ID,10, 2),
lpad(conv(GROUPING__ID,10, 2), 5,'0'),
reverse( lpad(conv(GROUPING__ID,10, 2), 5,'0')),
--case when substring(lpad(conv(GROUPING__ID,10, 2), 5,'0'),0,1)=0 then 'all' else 'a' end as a,
case when substring(lpad(conv(GROUPING__ID,10, 2), 5,'0'),1,1)=1 then 'all' else 'a' end as a,
case when substring(lpad(conv(GROUPING__ID,10, 2), 5,'0'),2,1)=1 then 'all' else 'b' end as b,
case when substring(lpad(conv(GROUPING__ID,10, 2), 5,'0'),3,1)=1 then 'all' else 'c' end as c,
case when substring(lpad(conv(GROUPING__ID,10, 2), 5,'0'),4,1)=1 then 'all' else 'd' end as d,
case when substring(lpad(conv(GROUPING__ID,10, 2), 5,'0'),5,1)=1 then 'all' else 'e' end as e
from
(
select '1' a ,'1' b ,'1' c ,'1' d ,'1' e
) as a1
group by a,b,c,d,e
grouping sets(
(a),
(b,c),
(b)
--,(a,b,c,d,e),(a,c)
)
-----hive统计结果
select
GROUPING__ID,
conv(GROUPING__ID,10, 2),
lpad(conv(GROUPING__ID,10, 2), 5,'0'),
reverse( lpad(conv(GROUPING__ID,10, 2), 5,'0')),
--case when substring(lpad(conv(GROUPING__ID,10, 2), 5,'0'),0,1)=0 then 'all' else 'a' end as a,
case when substring(lpad(conv(GROUPING__ID,10, 2), 5,'0'),1,1)=1 then 'all' else 'a' end as a,
case when substring(lpad(conv(GROUPING__ID,10, 2), 5,'0'),2,1)=1 then 'all' else 'b' end as b,
case when substring(lpad(conv(GROUPING__ID,10, 2), 5,'0'),3,1)=1 then 'all' else 'c' end as c,
case when substring(lpad(conv(GROUPING__ID,10, 2), 5,'0'),4,1)=1 then 'all' else 'd' end as d,
case when substring(lpad(conv(GROUPING__ID,10, 2), 5,'0'),5,1)=1 then 'all' else 'e' end as e
from
(
select '1' a ,'1' b ,'1' c ,'1' d ,'1' e
) as a1
group by a,b,c,d,e
grouping sets(
(a),
(b,c),
(b)
--,(a,b,c,d,e),(a,c)
)
思路:不论spark还是hive,都可以将grouping__id由十进制转换为二进制,按照聚合维度按照位数为“1”获取结果
注意:spark聚合维度按位看是“0”,hive聚合维度按位看是“1”。spark多维统计是从左到右进行聚合,hive多维统计是从右到左进行聚合。