(1)游标是面向过程的,普通sql语句是面向集合的。
(2)游标的扫描成本是o(n),集合的扫描成本可能是o(n的平方)。
(3)对表的一次扫描,游标的开销要远大于面向集合的方法。但是游标的优势体现在,对于一个面向集合的解决方案所需扫描成本为o(n的平方)的情况,基于游标的解决方案可能是o(n)。
(4)对于游标的使用,任何存在都是合理的,关键看能否合理利用。
下面是一个利用游标优化sql的例子:
创建测试表:
create table sessions (
id int,
app varchar(4),
usr varchar(5),
starttime datetime,
endtime datetime);
insert into sessions values(1,'app1','user1','2017-08-29 08:30:00','2017-08-29 10:30:00');
insert into sessions values(2,'app1','user2','2017-08-29 08:30:00','2017-08-29 08:45:00');
insert into sessions values(3,'app1','user1','2017-08-29 09:00:00','2017-08-29 09:30:00');
insert into sessions values(4,'app1','user2','2017-08-29 09:15:00','2017-08-29 10:30:00');
insert into sessions values(5,'app1','user1','2017-08-29 09:15:00','2017-08-29 09:30:00');
insert into sessions values(6,'app1','user2','2017-08-29 10:30:00','2017-08-29 14:30:00');
insert into sessions values(7,'app1','user1','2017-08-29 10:45:00','2017-08-29 11:30:00');
insert into sessions values(8,'app1','user2','2017-08-29 11:00:00','2017-08-29 12:30:00');
insert into sessions values(9,'app2','user1','2017-08-29 08:30:00','2017-08-29 08:45:00');
insert into sessions values(10,'app2','user2','2017-08-29 09:00:00','2017-08-29 09:30:00');
insert into sessions values(11,'app2','user1','2017-08-29 11:45:00','2017-08-29 12:00:00');
insert into sessions values(12,'app2','user2','2017-08-29 12:30:00','2017-08-29 14:00:00');
insert into sessions values(13,'app2','user1','2017-08-29 12:45:00','2017-08-29 13:30:00');
insert into sessions values(14,'app2','user2','2017-08-29 13:00:00','2017-08-29 14:00:00');
insert into sessions values(15,'app2','user1','2017-08-29 14:00:00','2017-08-29 16:30:00');
insert into sessions values(16,'app2','user2','2017-08-29 15:30:00','2017-08-29 17:00:00');
需求:如何求得某一时间段内的最大会话数。
(1)利用集合的方法:子查询。
思想:通过子查询找出某个app开始时间之前有另外的app,其开始运行时间在当前app之前,结束时间在当前app之后。
select app,max(count) as max from (
select app,s,(select count(1) from sessions as b where a.app=b.app and s>=starttime and s<endtime) as count
from (select distinct app,starttime as s from sessions) as a
) as bc
group by app;
root@localhost:employees 04:21:27> select app,s,(select count(1) from sessions as b where a.app=b.app and s>=starttime and s<endtime) as count
-> from (select distinct app,starttime as s from sessions) as a;
+------+---------------------+-------+
| app | s | count |
+------+---------------------+-------+
| app1 | 2017-08-29 08:30:00 | 3 |
| app1 | 2017-08-29 09:00:00 | 3 |
| app1 | 2017-08-29 09:15:00 | 5 |
| app1 | 2017-08-29 10:30:00 | 1 |
| app1 | 2017-08-29 10:45:00 | 2 |
| app1 | 2017-08-29 11:00:00 | 3 |
| app2 | 2017-08-29 08:30:00 | 1 |
| app2 | 2017-08-29 09:00:00 | 1 |
| app2 | 2017-08-29 11:45:00 | 1 |
| app2 | 2017-08-29 12:30:00 | 1 |
| app2 | 2017-08-29 12:45:00 | 2 |
| app2 | 2017-08-29 13:00:00 | 3 |
| app2 | 2017-08-29 14:00:00 | 1 |
| app2 | 2017-08-29 15:30:00 | 2 |
+------+---------------------+-------+
14 rows in set (0.01 sec)
root@localhost:employees 04:21:49> select app,max(count) as max from (
-> select app,s,(select count(1) from sessions as b where a.app=b.app and s>=starttime and s<endtime) as count
-> from (select distinct app,starttime as s from sessions) as a
-> ) as bc
-> group by app;
+------+------+
| app | max |
+------+------+
| app1 | 5 |
| app2 | 3 |
+------+------+
2 rows in set (0.00 sec)
explain extended
select app,max(count) as max from (
select app,s,(select count(1) from sessions as b where a.app=b.app and s>=starttime and s<endtime) as count
from (select distinct app,starttime as s from sessions) as a
) as bc
group by app;
+----+--------------------+------------+------+---------------+------+---------+------+------+----------+---------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+--------------------+------------+------+---------------+------+---------+------+------+----------+---------------------------------+
| 1 | PRIMARY | <derived2> | ALL | NULL | NULL | NULL | NULL | 17 | 100.00 | Using temporary; Using filesort |
| 2 | DERIVED | <derived4> | ALL | NULL | NULL | NULL | NULL | 17 | 100.00 | NULL |
| 4 | DERIVED | sessions | ALL | NULL | NULL | NULL | NULL | 17 | 100.00 | Using temporary |
| 3 | DEPENDENT SUBQUERY | b | ALL | NULL | NULL | NULL | NULL | 17 | 100.00 | Using where |
+----+--------------------+------------+------+---------------+------+---------+------+------+----------+---------------------------------+
从执行计划中可以发现,该sql语句先通过select distinct app,starttime as a from sessions产生一张派生表<derived4>。
然后该派生表与session表再进行相关子查询,求得派生表<derived2>
最后在派生表<derived2>上进行排序分组,求得最终结果。
这里相关子查询部分的扫描成本为o(n的平方)。
(2)利用游标解决
声明游标的语句:
select app,starttime as time,1 as type from sessions
union all
select app,endtime,-1 from sessions order by app,time,type desc;
+------+---------------------+------+
| app | time | type |
+------+---------------------+------+
| app1 | 2017-08-29 08:30:00 | 1 |
| app1 | 2017-08-29 08:30:00 | 1 |
| app1 | 2017-08-29 08:30:00 | 1 |
| app1 | 2017-08-29 08:45:00 | -1 |
| app1 | 2017-08-29 09:00:00 | 1 |
| app1 | 2017-08-29 09:15:00 | 1 |
| app1 | 2017-08-29 09:15:00 | 1 |
| app1 | 2017-08-29 09:30:00 | -1 |
| app1 | 2017-08-29 09:30:00 | -1 |
| app1 | 2017-08-29 10:30:00 | 1 |
| app1 | 2017-08-29 10:30:00 | -1 |
| app1 | 2017-08-29 10:30:00 | -1 |
| app1 | 2017-08-29 10:30:00 | -1 |
| app1 | 2017-08-29 10:45:00 | 1 |
| app1 | 2017-08-29 11:00:00 | 1 |
| app1 | 2017-08-29 11:30:00 | -1 |
| app1 | 2017-08-29 12:30:00 | -1 |
| app1 | 2017-08-29 14:30:00 | -1 |
| app2 | 2017-08-29 08:30:00 | 1 |
| app2 | 2017-08-29 08:45:00 | -1 |
| app2 | 2017-08-29 09:00:00 | 1 |
| app2 | 2017-08-29 09:30:00 | -1 |
| app2 | 2017-08-29 11:45:00 | 1 |
| app2 | 2017-08-29 12:00:00 | -1 |
| app2 | 2017-08-29 12:30:00 | 1 |
| app2 | 2017-08-29 12:45:00 | 1 |
| app2 | 2017-08-29 13:00:00 | 1 |
| app2 | 2017-08-29 13:30:00 | -1 |
| app2 | 2017-08-29 14:00:00 | 1 |
| app2 | 2017-08-29 14:00:00 | -1 |
| app2 | 2017-08-29 14:00:00 | -1 |
| app2 | 2017-08-29 15:30:00 | 1 |
| app2 | 2017-08-29 16:30:00 | -1 |
| app2 | 2017-08-29 17:00:00 | -1 |
+------+---------------------+------+
34 rows in set (0.00 sec)
该查询为每个会话按照开始时间和结束时间分别生成不同类型的事件,然后再进行union all的集合操作。
这样最大会话数问题可以转换为游标对获取每行时sum(type)的最大值。
delimiter //
create procedure cur_max_sessions()
begin
declare done int default 0;
declare _app varchar(10);
declare _prev_app varchar(10);
declare _time datetime;
declare _type int;
declare _current int;
declare _max int;
declare cur1 cursor for
select app,starttime as time,1 as type from sessions
union all
select app,endtime,-1 from sessions order by app,time,type desc;
declare continue handler for not found set done=1;
drop temporary table if exists ret;
create temporary table ret(
app varchar(10),
max int
) engine=memory;
open cur1;
fetch cur1 into _app,_time,_type;
set _prev_app=_app;
set _current=0;
set _max=0;
while done=0 do
begin
if _prev_app<>_app then
insert into ret select _prev_app,_max;
set _prev_app=_app;
set _max=0;
set _current=0;
end if;
set _current=_current+_type;
if _current>_max then
set _max=_current;
end if;
fetch cur1 into _app,_time,_type;
end;
end while;
if _prev_app is not null then
insert into ret select _prev_app,_max;
end if;
select * from ret;
drop table ret;
end;
//
delimiter ;
root@localhost:employees 05:24:09> call cur_max_sessions();
+------+------+
| app | max |
+------+------+
| app1 | 5 |
| app2 | 3 |
+------+------+
2 rows in set (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
(2)游标的扫描成本是o(n),集合的扫描成本可能是o(n的平方)。
(3)对表的一次扫描,游标的开销要远大于面向集合的方法。但是游标的优势体现在,对于一个面向集合的解决方案所需扫描成本为o(n的平方)的情况,基于游标的解决方案可能是o(n)。
(4)对于游标的使用,任何存在都是合理的,关键看能否合理利用。
下面是一个利用游标优化sql的例子:
创建测试表:
create table sessions (
id int,
app varchar(4),
usr varchar(5),
starttime datetime,
endtime datetime);
insert into sessions values(1,'app1','user1','2017-08-29 08:30:00','2017-08-29 10:30:00');
insert into sessions values(2,'app1','user2','2017-08-29 08:30:00','2017-08-29 08:45:00');
insert into sessions values(3,'app1','user1','2017-08-29 09:00:00','2017-08-29 09:30:00');
insert into sessions values(4,'app1','user2','2017-08-29 09:15:00','2017-08-29 10:30:00');
insert into sessions values(5,'app1','user1','2017-08-29 09:15:00','2017-08-29 09:30:00');
insert into sessions values(6,'app1','user2','2017-08-29 10:30:00','2017-08-29 14:30:00');
insert into sessions values(7,'app1','user1','2017-08-29 10:45:00','2017-08-29 11:30:00');
insert into sessions values(8,'app1','user2','2017-08-29 11:00:00','2017-08-29 12:30:00');
insert into sessions values(9,'app2','user1','2017-08-29 08:30:00','2017-08-29 08:45:00');
insert into sessions values(10,'app2','user2','2017-08-29 09:00:00','2017-08-29 09:30:00');
insert into sessions values(11,'app2','user1','2017-08-29 11:45:00','2017-08-29 12:00:00');
insert into sessions values(12,'app2','user2','2017-08-29 12:30:00','2017-08-29 14:00:00');
insert into sessions values(13,'app2','user1','2017-08-29 12:45:00','2017-08-29 13:30:00');
insert into sessions values(14,'app2','user2','2017-08-29 13:00:00','2017-08-29 14:00:00');
insert into sessions values(15,'app2','user1','2017-08-29 14:00:00','2017-08-29 16:30:00');
insert into sessions values(16,'app2','user2','2017-08-29 15:30:00','2017-08-29 17:00:00');
需求:如何求得某一时间段内的最大会话数。
(1)利用集合的方法:子查询。
思想:通过子查询找出某个app开始时间之前有另外的app,其开始运行时间在当前app之前,结束时间在当前app之后。
select app,max(count) as max from (
select app,s,(select count(1) from sessions as b where a.app=b.app and s>=starttime and s<endtime) as count
from (select distinct app,starttime as s from sessions) as a
) as bc
group by app;
root@localhost:employees 04:21:27> select app,s,(select count(1) from sessions as b where a.app=b.app and s>=starttime and s<endtime) as count
-> from (select distinct app,starttime as s from sessions) as a;
+------+---------------------+-------+
| app | s | count |
+------+---------------------+-------+
| app1 | 2017-08-29 08:30:00 | 3 |
| app1 | 2017-08-29 09:00:00 | 3 |
| app1 | 2017-08-29 09:15:00 | 5 |
| app1 | 2017-08-29 10:30:00 | 1 |
| app1 | 2017-08-29 10:45:00 | 2 |
| app1 | 2017-08-29 11:00:00 | 3 |
| app2 | 2017-08-29 08:30:00 | 1 |
| app2 | 2017-08-29 09:00:00 | 1 |
| app2 | 2017-08-29 11:45:00 | 1 |
| app2 | 2017-08-29 12:30:00 | 1 |
| app2 | 2017-08-29 12:45:00 | 2 |
| app2 | 2017-08-29 13:00:00 | 3 |
| app2 | 2017-08-29 14:00:00 | 1 |
| app2 | 2017-08-29 15:30:00 | 2 |
+------+---------------------+-------+
14 rows in set (0.01 sec)
root@localhost:employees 04:21:49> select app,max(count) as max from (
-> select app,s,(select count(1) from sessions as b where a.app=b.app and s>=starttime and s<endtime) as count
-> from (select distinct app,starttime as s from sessions) as a
-> ) as bc
-> group by app;
+------+------+
| app | max |
+------+------+
| app1 | 5 |
| app2 | 3 |
+------+------+
2 rows in set (0.00 sec)
explain extended
select app,max(count) as max from (
select app,s,(select count(1) from sessions as b where a.app=b.app and s>=starttime and s<endtime) as count
from (select distinct app,starttime as s from sessions) as a
) as bc
group by app;
+----+--------------------+------------+------+---------------+------+---------+------+------+----------+---------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+--------------------+------------+------+---------------+------+---------+------+------+----------+---------------------------------+
| 1 | PRIMARY | <derived2> | ALL | NULL | NULL | NULL | NULL | 17 | 100.00 | Using temporary; Using filesort |
| 2 | DERIVED | <derived4> | ALL | NULL | NULL | NULL | NULL | 17 | 100.00 | NULL |
| 4 | DERIVED | sessions | ALL | NULL | NULL | NULL | NULL | 17 | 100.00 | Using temporary |
| 3 | DEPENDENT SUBQUERY | b | ALL | NULL | NULL | NULL | NULL | 17 | 100.00 | Using where |
+----+--------------------+------------+------+---------------+------+---------+------+------+----------+---------------------------------+
从执行计划中可以发现,该sql语句先通过select distinct app,starttime as a from sessions产生一张派生表<derived4>。
然后该派生表与session表再进行相关子查询,求得派生表<derived2>
最后在派生表<derived2>上进行排序分组,求得最终结果。
这里相关子查询部分的扫描成本为o(n的平方)。
(2)利用游标解决
声明游标的语句:
select app,starttime as time,1 as type from sessions
union all
select app,endtime,-1 from sessions order by app,time,type desc;
+------+---------------------+------+
| app | time | type |
+------+---------------------+------+
| app1 | 2017-08-29 08:30:00 | 1 |
| app1 | 2017-08-29 08:30:00 | 1 |
| app1 | 2017-08-29 08:30:00 | 1 |
| app1 | 2017-08-29 08:45:00 | -1 |
| app1 | 2017-08-29 09:00:00 | 1 |
| app1 | 2017-08-29 09:15:00 | 1 |
| app1 | 2017-08-29 09:15:00 | 1 |
| app1 | 2017-08-29 09:30:00 | -1 |
| app1 | 2017-08-29 09:30:00 | -1 |
| app1 | 2017-08-29 10:30:00 | 1 |
| app1 | 2017-08-29 10:30:00 | -1 |
| app1 | 2017-08-29 10:30:00 | -1 |
| app1 | 2017-08-29 10:30:00 | -1 |
| app1 | 2017-08-29 10:45:00 | 1 |
| app1 | 2017-08-29 11:00:00 | 1 |
| app1 | 2017-08-29 11:30:00 | -1 |
| app1 | 2017-08-29 12:30:00 | -1 |
| app1 | 2017-08-29 14:30:00 | -1 |
| app2 | 2017-08-29 08:30:00 | 1 |
| app2 | 2017-08-29 08:45:00 | -1 |
| app2 | 2017-08-29 09:00:00 | 1 |
| app2 | 2017-08-29 09:30:00 | -1 |
| app2 | 2017-08-29 11:45:00 | 1 |
| app2 | 2017-08-29 12:00:00 | -1 |
| app2 | 2017-08-29 12:30:00 | 1 |
| app2 | 2017-08-29 12:45:00 | 1 |
| app2 | 2017-08-29 13:00:00 | 1 |
| app2 | 2017-08-29 13:30:00 | -1 |
| app2 | 2017-08-29 14:00:00 | 1 |
| app2 | 2017-08-29 14:00:00 | -1 |
| app2 | 2017-08-29 14:00:00 | -1 |
| app2 | 2017-08-29 15:30:00 | 1 |
| app2 | 2017-08-29 16:30:00 | -1 |
| app2 | 2017-08-29 17:00:00 | -1 |
+------+---------------------+------+
34 rows in set (0.00 sec)
该查询为每个会话按照开始时间和结束时间分别生成不同类型的事件,然后再进行union all的集合操作。
这样最大会话数问题可以转换为游标对获取每行时sum(type)的最大值。
delimiter //
create procedure cur_max_sessions()
begin
declare done int default 0;
declare _app varchar(10);
declare _prev_app varchar(10);
declare _time datetime;
declare _type int;
declare _current int;
declare _max int;
declare cur1 cursor for
select app,starttime as time,1 as type from sessions
union all
select app,endtime,-1 from sessions order by app,time,type desc;
declare continue handler for not found set done=1;
drop temporary table if exists ret;
create temporary table ret(
app varchar(10),
max int
) engine=memory;
open cur1;
fetch cur1 into _app,_time,_type;
set _prev_app=_app;
set _current=0;
set _max=0;
while done=0 do
begin
if _prev_app<>_app then
insert into ret select _prev_app,_max;
set _prev_app=_app;
set _max=0;
set _current=0;
end if;
set _current=_current+_type;
if _current>_max then
set _max=_current;
end if;
fetch cur1 into _app,_time,_type;
end;
end while;
if _prev_app is not null then
insert into ret select _prev_app,_max;
end if;
select * from ret;
drop table ret;
end;
//
delimiter ;
root@localhost:employees 05:24:09> call cur_max_sessions();
+------+------+
| app | max |
+------+------+
| app1 | 5 |
| app2 | 3 |
+------+------+
2 rows in set (0.00 sec)
Query OK, 0 rows affected (0.00 sec)