在项目中,我们经常遇到或用到分页,那么在大数据量(百万级以上)下,哪种分页算法效率最优呢?我们不妨用事实说话。
测试环境
硬件:CPU 酷睿双核T5750 内存:2G
软件:Windows server 2003 + Sql server 2005
OK,我们首先创建一数据库:data_Test,并在此数据库中创建一表:tb_TestTable
create database data_Test --创建数据库data_Test
GO
use data_Test
GO
create table tb_TestTable --创建表
(
id int identity(1,1) primary key,
userName nvarchar(20) not null,
userPWD nvarchar(20) not null,
userEmail nvarchar(40) null
)
GO
然后我们在数据表中插入2000000条数据:
--插入数据
set identity_insert tb_TestTable on
declare @count int
set @count=1
while @count<=2000000
begin
insert into tb_TestTable(id,userName,userPWD,userEmail) values
(@count,'admin','admin888','lli0077@yahoo.com.cn')
set @count=@count+1
end
set identity_insert tb_TestTable off
我首先写了五个常用存储过程:
1,利用select top 和select not in进行分页,具体代码如下:
create procedure proc_paged_with_notin --利用select top and select not in
(
@pageIndex int, --页索引
@pageSize int --每页记录数
)
as
begin
set nocount on;
declare @timediff datetime --耗时
declare @sql nvarchar(500)
select @timediff=Getdate()
set @sql='select top '+str(@pageSize)+' * from tb_TestTable where(ID not in(select top '+str(@pageSize*@pageIndex)+' id from tb_TestTable order by ID ASC)) order by ID'
execute(@sql) --因select top后不支技直接接参数,所以写成了字符串@sql
select datediff(ms,@timediff,GetDate()) as 耗时
set nocount off;
end
2,利用select top 和 select max(列键)
create procedure proc_paged_with_selectMax --利用select top and select max(列)
(
@pageIndex int, --页索引
@pageSize int --页记录数
)
as
begin
set nocount on;
declare @timediff datetime
declare @sql nvarchar(500)
select @timediff=Getdate()
set @sql='select top '+str(@pageSize)+' * From tb_TestTable where(ID>(select max(id) From (select top '+str(@pageSize*@pageIndex)+' id From tb_TestTable order by ID) as TempTable)) order by ID'
execute(@sql)
select datediff(ms,@timediff,GetDate()) as 耗时
set nocount off;
end
3,利用select top和中间变量--此方法因网上有人说效果最佳,所以贴出来一同测试
create procedure proc_paged_with_Midvar --利用ID>最大ID值和中间变量
(
@pageIndex int,
@pageSize int
)
as
declare @count int
declare @ID int
declare @timediff datetime
declare @sql nvarchar(500)
begin
set nocount on;
select @count=0,@ID=0,@timediff=getdate()
select @count=@count+1,@ID=case when @count<=@pageSize*@pageIndex then ID else @ID end from tb_testTable order by id
set @sql='select top '+str(@pageSize)+' * from tb_testTable where ID>'+str(@ID)
execute(@sql)
select datediff(ms,@timediff,getdate()) as 耗时
set nocount off;
end
4,利用Row_number() 此方法为SQL server 2005中新的方法,利用Row_number()给数据行加上索引
create procedure proc_paged_with_Rownumber --利用SQL 2005中的Row_number()
(
@pageIndex int,
@pageSize int
)
as
declare @timediff datetime
begin
set nocount on;
select @timediff=getdate()
select * from (select *,Row_number() over(order by ID asc) as IDRank from tb_testTable) as IDWithRowNumber where IDRank>@pageSize*@pageIndex and IDRank<@pageSize*(@pageIndex+1)
select datediff(ms,@timediff,getdate()) as 耗时
set nocount off;
end
5,利用临时表及Row_number
create procedure proc_CTE --利用临时表及Row_number
(
@pageIndex int, --页索引
@pageSize int --页记录数
)
as
set nocount on;
declare @ctestr nvarchar(400)
declare @strSql nvarchar(400)
declare @datediff datetime
begin
select @datediff=GetDate()
set @ctestr='with Table_CTE as
(select ceiling((Row_number() over(order by ID ASC))/'+str(@pageSize)+') as page_num,* from tb_TestTable)';
set @strSql=@ctestr+' select * From Table_CTE where page_num='+str(@pageIndex)
end
begin
execute sp_executesql @strSql
select datediff(ms,@datediff,GetDate())
set nocount off;
end
OK,至此,存储过程创建完毕,我们分别在每页10条数据的情况下在第2页,第1000页,第10000页,第100000页,第199999页进行测试,耗时单位:ms 每页测试5次取其平均值
存过 | 第2页耗时 | 第1000页 | 第10000页 | 第100000页 | 第199999页 | 效率排行 |
1用not in | 0ms | 16ms | 47ms | 475ms | 953ms | 3 |
2用select max | 5ms | 16ms | 35ms | 325ms | 623ms | 1 |
3中间变量 | 966ms | 970ms | 960ms | 945ms | 933ms | 5 |
4row_number | 0ms | 0ms | 34ms | 365ms | 710ms | 2 |
5临时表 | 780ms | 796ms | 798ms | 780ms | 805ms | 4 |
测试结果显示:
select max >row_number>not in>临时表>中间变量
输出窗口 |
其它
分页存储过程select max 扩展
create procedure proc_paged_with_selectMax --利用select top and select max(列)
(
@pageIndex int, --页索引
@pageSize int --页记录数
)
as
begin
set nocount on;
declare @timediff datetime
declare @sql nvarchar(500)
select @timediff=Getdate()
SELECT @count=rows FROM sysindexes WHERE id=OBJECT_ID ('tb_TestTable')
if(@pageSize*@pageIndex>@count/2) begin
set @sql='
select * from (
select top '+str(@pageSize)+' * from tb_TestTable where
(ID < (select min(ID) from
(select top '+str(@count-@pageSize*@pageIndex-@pageSize)+' ID from tb_TestTable order by ID desc)
as T)) order by ID desc
) as T2 order by ID'
end
else begin
set @sql='
select top '+str(@pageSize)+' * From tb_TestTable where
(ID>(select max(ID) From
(select top '+str(@pageSize*@pageIndex)+' ID From tb_TestTable order by ID)
as TempTable)) order by ID'
end
execute(@sql)
select datediff(ms,@timediff,GetDate()) as 耗时
set nocount off;
end
分页存储过程
create PROCEDURE GetRecordFromPage
@tblName varchar(255), -- 表名
@fldName varchar(255), -- 字段名以主键值
@PageSize int = 10, -- 页尺寸
@PageIndex int = 2, -- 页码
@OrderType bit = 0, -- 设置排序类型, 非 0 值则降序
@strWhere varchar(2000) = '' -- 查询条件 (注意: 不要加 where)
AS
declare @strSQL varchar(6000) -- 主语句
declare @strTmp varchar(5000) -- 临时变量
declare @strOrder varchar(500) -- 排序类型
if @OrderType != 0
begin
set @strTmp = '<(select min'
set @strOrder = ' order by [' + @fldName +'] desc'
end
else
begin
set @strTmp = '>(select max'
set @strOrder = ' order by [' + @fldName +'] asc'
end
set @strSQL = 'select top ' + str(@PageSize) + ' * from ['
+ @tblName + '] where [' + @fldName + ']' + @strTmp + '(['
+ @fldName + ']) from (select top ' + str((@PageIndex-1)*@PageSize) + ' ['
+ @fldName + '] from [' + @tblName + ']' + @strOrder + ') as tblTmp)'
+ @strOrder
if @strWhere != ''
set @strSQL = 'select top ' + str(@PageSize) + ' * from ['
+ @tblName + '] where [' + @fldName + ']' + @strTmp + '(['
+ @fldName + ']) from (select top ' + str((@PageIndex-1)*@PageSize) + ' ['
+ @fldName + '] from [' + @tblName + '] where ' + @strWhere + ' '
+ @strOrder + ') as tblTmp) and ' + @strWhere + ' ' + @strOrder
if @PageIndex = 1
begin
set @strTmp = ' '
if @strWhere != ''
set @strTmp = ' where (' + @strWhere + ')'
set @strSQL = 'select top ' + str(@PageSize) + ' * from ['
+ @tblName + ']' + @strTmp + ' '+ @strOrder
end
--print @strSQL
exec (@strSQL)
GO
参考: www.cnblogs.com/sdjxcolin/archive/2009/01/04/1368037.html