已经说好不玩了.还是不能罢手.
之前极致优化的方法,是先计算最小区间,再计算最小区间上的用户数量.
其实可以一边计算最小区间,同时计算最小区间上的用户数量.
到了这个MySQL的版本,确实已经非常的晦涩难懂了.
由于MySQL没有提供开窗函数,这种复杂计算非常的吃亏.
这个最后的版本,执行时间在 889毫秒到921毫秒之间.
MySQL 版本 最后还能优化小100毫秒,非常的不容易.
这个版本过于晦涩,我用同样的思路,写了一个Oracle版本的.
性能好,可读性还强.
不像MySQL,需要考虑所有的细节.这个同样算法的Oracle版本,随随便便就能跑到213毫秒.
之前极致优化的方法,是先计算最小区间,再计算最小区间上的用户数量.
其实可以一边计算最小区间,同时计算最小区间上的用户数量.
- DELIMITER $$
- CREATE DEFINER=`root`@`localhost` PROCEDURE `p`()
- BEGIN
- drop table if exists t1;
- drop table if exists tmp_time_point;
- drop table if exists tmp_s;
- CREATE temporary TABLE `t1` (
- `roomid` int(11) NOT NULL DEFAULT '0',
- `userid` bigint(20) NOT NULL DEFAULT '0',
- `s` timestamp,
- `e` timestamp,
- primary key(roomid,userid,s,e)
- ) ENGINE=memory;
- create temporary table tmp_time_point(
- roomid bigint,
- timepoint timestamp,
- type smallint,
- key(roomid,timepoint)
- ) engine=memory;
- create temporary table tmp_s(
- roomid bigint,
- userid bigint,
- s timestamp,
- e timestamp,
- i int
- ) engine=memory;
- SET @A=0;
- SET @B=0;
- insert into tmp_s
- SELECT x.roomid,x.userid,s,e,datediff(e,s)+1 i
- FROM
- (
- (
- SELECT @B:=@B+1 AS id,roomid,userid,s
- FROM (
- SELECT DISTINCT roomid, userid, roomstart AS s
- FROM u_room_log a
- WHERE NOT EXISTS (SELECT *
- FROM u_room_log b
- WHERE a.roomid = b.roomid
- AND a.userid = b.userid
- AND a.roomstart > b.roomstart
- AND a.roomstart <= b.roomend)
- ) AS p
- ) AS x,
- (
- SELECT @A:=@A+1 AS id,roomid,userid,e
- FROM
- (
- SELECT DISTINCT roomid, userid, roomend AS e
- FROM u_room_log a
- WHERE NOT EXISTS (SELECT *
- FROM u_room_log b
- WHERE a.roomid = b.roomid
- AND a.userid = b.userid
- AND a.roomend >= b.roomstart
- AND a.roomend < b.roomend)
- ) AS o
- ) AS y
- )
- WHERE x.id = y.id AND x.roomid = y.roomid AND x.userid = y.userid ;
- select max(i) into @c from tmp_s;
- insert ignore into t1(roomid,userid,s,e)
- select
- roomid, userid,
- if(date(s)!=date(e) and id>1,date(s+interval id-1 day),s) s,
- if(date(s+interval id-1 day)=date(e) ,e,date_format(s+interval id-1 day,'%Y-%m-%d 23:59:59')) e
- from tmp_s t1 STRAIGHT_JOIN
- nums on(nums.id<=t1.i)
- where nums.id<=@c
- ;
- -- 开始点+1,结束点-1
- insert into tmp_time_point(roomid,timepoint,type) select roomid,s,1 from t1;
- insert into tmp_time_point(roomid,timepoint,type) select roomid,e,-1 from t1;
- select roomid,date(s) dt,round(sum(timestampdiff(second,date_format(s,'%Y-%m-%d %H:%i:%s'),date_format(e,'%Y-%m-%d %H:%i:%s')))/60) ts,max(rn) c from (
- select
- if(@roomid=roomid,@d,'') as s,@d:=str_to_date(timepoint,'%Y-%m-%d %H:%i:%s.%f'),@roomid:=roomid,p.roomid,str_to_date(timepoint,'%Y-%m-%d %H:%i:%s.%f') e ,rn
- from
- (
- select round(case when @roomid=roomid then @rn:=@rn+prevType when @roomid:=roomid then @rn:=prevType end) rn,b.prevType,roomid,timepoint,type from (
- select if(@roomid=roomid,@type,0) prevType ,case when @roomid=roomid then @type:=type when @roomid:=roomid then @type:=1 end,a.roomid,timepoint,type from (
- select * from ( select roomid,timepoint,sum(type) type from tmp_time_point group by roomid,timepoint) tmp_time_point,(select @roomid:=-1,@rn:=0,@type:=0) vars order by roomid ,timepoint
- ) a
- ) b order by roomid ,timepoint
- )p,(select @d:='',@roomid:=-1) vars
- order by roomid,timepoint
- ) v4 where s!='' and date(s)=date(e) and rn>=2
- group by roomid,date(s);
- END
到了这个MySQL的版本,确实已经非常的晦涩难懂了.
由于MySQL没有提供开窗函数,这种复杂计算非常的吃亏.
这个最后的版本,执行时间在 889毫秒到921毫秒之间.
MySQL 版本 最后还能优化小100毫秒,非常的不容易.
这个版本过于晦涩,我用同样的思路,写了一个Oracle版本的.
性能好,可读性还强.
- with c1 as -- 合并同一房间同一用户的重叠时间段,用于统计峰值人数
- (
- select distinct roomid,userid,min(s) s,max(e) e
- from (select roomid,userid,s,e,
- sum(broken) over (partition by roomid, userid order by s,e) flag
- from (select t.*,
- (case when s <= max(e) over (partition by roomid, userid order by s,e rows between unbounded preceding and 1 preceding) then 0
- else 1
- end) as broken
- from (select roomid,userid,roomstart s,roomend e from u_room_log ) t
- ) t
- ) t
- group by roomid,userid,flag
- ),
- c2 as -- 拆分跨天的时间段
- (
- select *
- from (select roomid,userid,s,e
- from c1
- where trunc(s) = trunc(e) -- 不跨天
- union all
- select roomid,userid,
- case when id = 1 then s else trunc(s)+id-1 end s,
- case when id = m2 then e else (trunc(s)+id) -(1/24/60/60) end e
- from (select roomid,userid,s,e,id,
- max(id) over (partition by roomid,userid,s) m2
- from c1,nums
- where trunc(s) <> trunc(e) -- 跨天
- and id <= trunc(e)-trunc(s)+1) t1) t1
- ),
- c3 as -- 在计算最小范围的同时,计算区间用户数
- (
- select roomid,ts endtime,sum(prevType) over(partition by roomid order by ts) rn,
- lag(ts) over (partition by roomid order by ts) starttime
- from (
- select a.*,nvl(lag(type) over (partition by roomid order by ts),0) prevType
- from (
- select
- roomid,ts,sum(type) type
- from (
- select roomid,e ts, -1 type
- from c2
- union all
- select roomid,s ts, 1 type
- from c2
- ) group by roomid,ts
- ) a
- ) c
- )
- select roomid,to_char(dt,'yyyy-mm-dd HH24:mi:ss') dt,round(sum(dur)/60) ts,max(rn) c from (
- select roomid,trunc(starttime) dt,(endtime-starttime)*3600*24 dur,rn
- from c3 where rn>=2 and trunc(endtime)=trunc(starttime) and starttime is not null
- order by roomid,endtime
- )
- group by roomid,dt
- order by roomid,dt;
不像MySQL,需要考虑所有的细节.这个同样算法的Oracle版本,随随便便就能跑到213毫秒.
来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/29254281/viewspace-2150464/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/29254281/viewspace-2150464/