经常会遇到取一组数据分组后最大(小)值的行,以前常用Rank 和Partition by,我想了下换个方法其实还可以,没有测试性能如何.
create table test
(
col1 number,
col2 varchar2(20),
col3 number
);
insert into test
select 1,'content',2 from dual;
insert into test
select 1,'content2',3 from dual;
insert into test
select 1,'content2',4 from dual;
insert into test
select 2,'content',1 from dual;
insert into test
select 2,'content2',30 from dual;
insert into test
select 2,'content2',4 from dual;
insert into test
select 3,'content',10 from dual;
insert into test
select 3,'content2',3 from dual;
insert into test
select 3,'content2',4 from dual;
commit ;
rank---partition by的写法
select * from
(
select rank() over (partition by col1 order by col3 desc) rn,
a.*
from test a
)X
where rn=1;
现在使用max函数也行
select * from test a
where col3 in
(
select max(col3) from test b
where a.col1=b.col1
);
不知道对于大数据量到底如何测试下数据中一个表
select count(*) from form_action_log
COUNT(*) 9903874
用Rank
select count(*) from
(
select rank() over (partition by a.form_id order by a.action_time desc) rn,
a.*
from form_action_log a
)X
where rn=1;
------70.125s result COUNT(*) 4248095
用MAX1
select count(*) from form_action_log a
where a.action_time in
(
select max(b.action_time) from form_action_log b
where a.form_id=b.form_id
);
---326.000s COUNT(*) 4248095
用MAX2
select count(*) from form_action_log a
where a.action_time>=
(
select max(b.action_time) from form_action_log b
where a.form_id=b.form_id
);
<60s COUNT(*) 4248095
不过奇怪 看到执行计划上面用Rank的方法比用Max1,Cost/IO Cost要大 执行time多久,
但实际结果时间好像不一样.(PL/Sql developer7.0上测试的结果)
改写了下SQL MAX2快一点了
以后再想想怎么改写吧.......
就到这里了