问题描述:
一张表:test_over(uid, title, stime, etime, date)
字段comment:主播ID,直播间标题,开始直播时间戳,结束直播时间戳,日期分区
问题:求每个主播当天最近一次直播记录和全天总的直播时长
要求: shuffle次数最少
思路:
粒度是主播;时间周期是每天;需求是两个,第一,当天最近一次直播记录,第二全天总的直播时长;要求shuffle次数最低;
数据准备:
create table test_over( uid int comment '主播ID' ,title varchar(50) comment '直播间标题' ,stime int comment'开始直播时间戳' ,etime int comment'结束直播时间戳' ,mdate date comment'日期分区' ); |
insert overwrite table test_over select 1,'直播1',1591607704,1591609704,'2020-06-08' union all select 1,'直播2',1591617204,1591621204,'2020-06-08' union all select 1,'直播3',1591626204,1591628204,'2020-06-08' union all select 1,'直播4',1591628304,1591629304,'2020-06-08' union all select 2,'直播5',1591607604,1591609588,'2020-06-08' union all select 2,'直播6',1591617208,1591621206,'2020-06-08' union all select 2,'直播7',1591626208,1591628206,'2020-06-08' union all select 2,'直播8',1591628308,1591629306,'2020-06-08' union all select 3,'直播11',1591607704,1591609704,'2020-06-09' union all select 3,'直播12',1591617204,1591621204,'2020-06-09' union all select 3,'直播13',1591626204,1591628204,'2020-06-09' union all select 3,'直播14',1591628304,1591629304,'2020-06-09' union all select 4,'直播15',1591607604,1591609588,'2020-06-09' union all select 4,'直播16',1591617208,1591621206,'2020-06-09' union all select 4,'直播17',1591626208,1591628206,'2020-06-09' union all select 4,'直播18',1591628308,1591629306,'2020-06-09' |
实现:
select * from( select uid ,title ,mdate ,sum(etime-stime)over(partition by uid,mdate) ,row_number()over(partition by uid,mdate order by etime desc)rw from test_over )a where rw=1 ; |