聚合因子:因为索引有顺序,表的数据也有顺序,假如表的数据顺序和索引的顺序非常不匹配,就会产生大量的聚合因子,非常影响性能。
--colocated表根据x列有一定的物理顺序
drop table colocated purge;
create table colocated ( x int, y varchar2(80) );
begin
for i in 1 .. 100000
loop
insert into colocated(x,y)
values (i, rpad(dbms_random.random,75,'*') );
end loop;
end;
/
alter table colocated
add constraint colocated_pk
primary key(x);
begin
dbms_stats.gather_table_stats( user, 'COLOCATED', cascade=>true );
end;
/
--disorganized 表数据根据x列完全无序
drop table disorganized purge;
create table disorganized
as
select x,y
from colocated
order by y;
alter table disorganized
add constraint disorganized_pk
primary key (x);
begin
dbms_stats.gather_table_stats( user, 'DISORGANIZED', cascade=>true );
end;
/
set autotrace off
alter session set statistics_level=all;
set linesize 1000
---两者性能差异显著
select /*+ index( colocated colocated_pk ) */ * from colocated where x between 20000 and 40000;
SELECT * FROM table(dbms_xplan.display_cursor(NULL,NULL,'runstats_last'));
------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 20001 |00:00:00.05 | 2900 |
| 1 | TABLE ACCESS BY INDEX ROWID| COLOCATED | 1 | 20002 | 20001 |00:00:00.05 | 2900 |
|* 2 | INDEX RANGE SCAN | COLOCATED_PK | 1 | 20002 | 20001 |00:00:00.03 | 1375 |
------------------------------------------------------------------------------------------------------
select /*+ index( disorganized disorganized_pk ) */* from disorganized where x between 20000 and 40000;
SELECT * FROM table(dbms_xplan.display_cursor(NULL,NULL,'runstats_last'));
---------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 20001 |00:00:00.09 | 21360 |
| 1 | TABLE ACCESS BY INDEX ROWID| DISORGANIZED | 1 | 20002 | 20001 |00:00:00.09 | 21360 |
|* 2 | INDEX RANGE SCAN | DISORGANIZED_PK | 1 | 20002 | 20001 |00:00:00.03 | 1375 |
---------------------------------------------------------------------------------------------------------
---看聚合因子,就明白真正的原因了。
select a.index_name,
b.num_rows,
b.blocks,
a.clustering_factor
from user_indexes a, user_tables b
where index_name in ('COLOCATED_PK', 'DISORGANIZED_PK' )
and a.table_name = b.table_name;
INDEX_NAME NUM_ROWS BLOCKS CLUSTERING_FACTOR
------------------------------ ---------- ---------- -----------------
COLOCATED_PK 100000 1252 1190
DISORGANIZED_PK 100000 1219 99899
解析:在这里10w个里面有1190 和99.。等个不符合顺序的记录,数据越大,性能越差。