目录
改写SQL实现
使用grouping sets代替union
-- 优化前
SELECT s_age,s_sex,count(1)
FROM student_tb_orc
GROUP BY s_age,s_sex
UNION ALL
SELECT s_age,null,count(1)
FROM student_tb_orc
GROUP BY s_age,s_sex;
-- 优化后
SELECT s_age,s_sex,count(1)
FROM student_tb_orc
GROUP BY s_age,s_sex
GROUPING SETS(s_age,(s_age,s_sex));
分解count(distinct)
-- 原代码
SELECT COUNT(DISTINCT user_id),COUNT(DISTINCT cuid)
FROM udw.udw_event
WHERE event_action='searchbox_mobile_app_operate'
AND event_day='${DAY}'
-- 上面代码主要问题,COUNT_DISTINCT 只有1个reduce,形成单点瓶颈
-- 修改后代码
SET mapred.reduce.tasks=30