insert overwrite table dwd.dwd_user_play_event_1d_delta_daily partitions (dt = '${DT}')
select *
from ods.user_event_log_ex
where dt = '${DT}' and event_name = 'play_start'
union all
select *
from ods.user_event_log_ex
where dt = '${DT}' and event_name = 'play_end' and play_time > 0
union all
select *
from ods.user_event_log_ex
where dt = '${DT}' and event_name = 'play_error' and error_code is not null
从上SQL中可以看出都是从同一张表中拉取数据,只是条件不一样,但上面的SQL需要对同一张表扫描三次,每次union all都会产生mapreduce,都是需要消耗资源和时间的,可对以上做如下优化,减少对表的扫描次数和mapreduce的产生。
from ods.user_event_log_ex
inser overwrite table dwd.dwd_user_play_event_1d_delta_daily partition (dt = '${DT}')
select *
where dt ='${DT}' and event_name = 'play_start'
insert overwrite table dwd.dwd_user_play_event_1d_delta_daily partition (dt = '${DT}')
select *
where dt ='${DT}' and event_name = 'play_end' and play_time > 0
insert overwrite table dwd.dwd_user_play_event_1d_delta_daily partition (dt = '${DT}')
select *
where dt ='${DT}' and event_name = 'play_error' and error_code is not null