2013-08-09 星期五
------------直方图对执行计划影响的解决办法-----------------
SQL> create table tt as select 1 id,object_name from all_objects;
Table created.
SQL> update tt set id=99 where rownum=1; --让ID列分布极度的不均匀,非常倾斜
1 row updated.
SQL> commit;
Commit complete.
SQL> create index ind_tt on tt(id);
Index created.
SQL> exec dbms_stats.gather_table_stats(user,'tt',estimate_percent=>100,method_opt=>'for all columns size skewonly')
PL/SQL procedure successfully completed.
SQL> select endpoint_value,endpoint_number from user_tab_histograms where table_name='TT' and column_name='ID' order by endpoint_number;
ENDPOINT_VALUE ENDPOINT_NUMBER
-------------- ---------------
1 40944
99 40945 --两个bucket
select * from user_tab_columns where table_name='TT' --ID频度直方图,object_name等高直方图
SQL> conn hr/hr
Connected.
SQL> explain plan set statement_id '1' for select * from tt where id=1;
Explained.
SQL> explain plan set statement_id '99' for select * from tt where id=99;
Explained.
SQL> select statement_id,cardinality from plan_table where id=0 order by statement_id;
STATEMENT_ID CARDINALITY
------------------------------ -----------
1 40944
99 1
SQL> select * from tt where id=99;
Execution Plan
----------------------------------------------------------
Plan hash value: 3656862534
--------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 28 | 2 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID| TT | 1 | 28 | 2 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | IND_TT | 1 | | 1 (0)| 00:00:01 |
--------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("ID"=99)
SQL> select * from tt where id=1;
40944 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 264906180
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 40944 | 1119K| 47 (3)| 00:00:01 |
|* 1 | TABLE ACCESS FULL| TT | 40944 | 1119K| 47 (3)| 00:00:01 |
--------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("ID"=1)
注意:如果没有生成直方图信息的话
SQL> exec dbms_stats.gather_table_stats(user,'tt',estimate_percent=>100,method_opt=>'for all columns size 1');
PL/SQL procedure successfully completed.
select * from user_tab_columns where table_name='TT' --确认是NONE
SQL> select endpoint_value,endpoint_number from user_tab_histograms where table_name='TT' and column_name='ID' order by endpoint_number;
ENDPOINT_VALUE ENDPOINT_NUMBER
-------------- ---------------
1 0
99 1
SQL> conn hr/hr
Connected.
SQL> explain plan set statement_id '1' for select * from tt where id=1;
Explained.
SQL> explain plan set statement_id '99' for select * from tt where id=99;
Explained.
SQL> select statement_id,cardinality from plan_table where id=0 order by statement_id;
STATEMENT_ID CARDINALITY
------------------------------ -----------
1 20473
99 20473
SQL> select * from tt where id=99;
Execution Plan
----------------------------------------------------------
Plan hash value: 264906180
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 20473 | 559K| 47 (3)| 00:00:01 |
|* 1 | TABLE ACCESS FULL| TT | 20473 | 559K| 47 (3)| 00:00:01 |
--------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("ID"=99)
SQL> select * from tt where id=1;
40944 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 264906180
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 20473 | 559K| 47 (3)| 00:00:01 |
|* 1 | TABLE ACCESS FULL| TT | 20473 | 559K| 47 (3)| 00:00:01 |
--------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("ID"=1)
错误的CAR,错误的PLAN。
总结:当列值分布不均匀的时候,收集直方图以AUTO方式可能不会生成直方图,此时执行计划是不正确的,
此时需要认为干预。
A 等高直方图,会引起执行计划的误差,误差随着bucket的增加而减少,随着数据量增加而增加。
B 如果直方图类型为NONE,此时,对于分布不均匀的列,会发生plan的严重错误
C 最佳的情况是用频度直方图,但是count(distinct id)>254了,没办法作频度直方图,只能作等高直方图,
将bucket数量加到254,把误差减到最小!
D 收集直方图的动作封装在dbms_stats包中的,但是封装以后不是很精确了,因为策略都是一样的,
所以留出了method_opt参数让我们调整。
E 收集直方图信息,对于数据量大的表是有性能损耗的,建议仅对有索引的列收集信息:
FOR ALL INDEXED COLUMNS [size_clause],根据实际需求SQL来确定哪些列作直方图。
最主要关注列值分布不均匀的列!!!
------------------------企业数据库中作性能数据收集策略的方法(思路)-------------
1、作配置表——p_config
owner table_name estimate_percent cascade method_opt granularity analyze_time
------------------------------------------------------------------------------------------------
SCOTT TT 100 true for all columns size skewonly GLOBAL AND PARTITION
HR T1 30 true for INDEXED columns size skewonly GLOBAL
。。。。。
-------------------------------------------------------------------
2、创建SP
for idx in (select * from p_config) loop
dbms_stats.gather_table_stats(idx.owner,idx.table_name,estimate_percent=>idx.estimate_percent,cascade=>idx.cascade,method_opt=>idx.method_opt...);
end loop;
3、将SP封装在后台JOB中,定时运行收集性能数据。
------------------------------------------------------------------------
表和索引的统计信息在,列的信息没有了,plan发生怎样的变化?
1、将列的信息删除
SQL> exec dbms_stats.delete_column_stats(user,'tt','id');
PL/SQL procedure successfully completed.
select * from user_tab_col_statistics where table_name='TT' and column_name='ID' --查不到数据
确认表和索引的信息还在:
SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TT';
NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
---------- ----------- ---------- -------------------
40945 28 202 2013-08-08 14:49:51
SQL> select blevel,leaf_blocks,distinct_keys,last_analyzed from user_indexes where table_name='TT';
BLEVEL LEAF_BLOCKS DISTINCT_KEYS LAST_ANALYZED
---------- ----------- ------------- -------------------
1 80 2 2013-08-08 14:49:51
SQL> select * from tt where id=1;
40944 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 3656862534
--------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 409 | 11452 | 41 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID| TT | 409 | 11452 | 41 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | IND_TT | 164 | | 40 (0)| 00:00:01 |
--------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("ID"=1)
SQL> select count(1) from tt where id=1;
COUNT(1)
----------
40944
SQL> select * from tt where id=99; --当前的级别下(级别2),列的信息缺失不会作动态采样,当级别为3的时候会采样。
Execution Plan
----------------------------------------------------------
Plan hash value: 3656862534
--------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 409 | 11452 | 41 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID| TT | 409 | 11452 | 41 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | IND_TT | 164 | | 40 (0)| 00:00:01 |
--------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("ID"=99)
SQL> select count(1) from tt where id=99;
COUNT(1)
----------
1
虽然表和索引的统计信息还在,但是列的信息没了,直方图更加没有了。CBO无法知道列值的分布情况的,没有办法给出正确的执行计划。
将采样级别改到3:
SQL> show parameter dynamic
NAME TYPE VALUE
------------------------------------ ----------- ------------------------------
optimizer_dynamic_sampling integer 2
SQL> alter system set optimizer_dynamic_sampling=3 scope=both;
System altered.
SQL> show parameter dynamic
NAME TYPE VALUE
------------------------------------ ----------- ------------------------------
optimizer_dynamic_sampling integer 3
SQL> select * from tt where id=99;
Execution Plan
----------------------------------------------------------
Plan hash value: 3656862534
--------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 7 | 196 | 2 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID| TT | 7 | 196 | 2 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | IND_TT | 7 | | 1 (0)| 00:00:01 |
--------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("ID"=99)
Note
-----
- dynamic sampling used for this statement
SQL> select * from tt where id=1;
40944 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 264906180
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 40938 | 1119K| 47 (3)| 00:00:01 |
|* 1 | TABLE ACCESS FULL| TT | 40938 | 1119K| 47 (3)| 00:00:01 |
--------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("ID"=1)
Note
-----
- dynamic sampling used for this statement
结论:
采样级别>=3,列的信息缺失会作动态采样;
采样级别<=2,列的信息缺失不会动态采样。
------------------------聚簇因子--------------------------------------
聚簇因子是索引的一个属性。
select index_name,clustering_factor from user_indexes --索引的聚簇因子
SQL> select index_name,clustering_factor from user_indexes where index_name='T6_IND';
INDEX_NAME CLUSTERING_FACTOR
------------------------------ -----------------
T6_IND 376
定义——表示索引中的键值和源表上数据分布的一种关系,当索引键值和表中的数据以及和占用的数据块数量大致相同的时候,意味着索引键值指向数据块越集中,这个因子越小,越有利于索引的使用,相反,当索引键值指向的数据块越多,数据的排列和索引相差很大,这个因子也就越大。
这个指标对执行计划影响是很大的,比如执行计划异常的时候,但是从card上无法选择正确,经常考虑聚簇因子有问题或者直方图对应的列分布不均匀。
聚簇因子是怎样算出来的,索引是创建在列上的,聚簇因子是针对索引的,也是针对列的,所以聚簇因子也是列的属性。
聚簇因子就是列的索引对应的数据行扫描的时候所扫的数据块次。
create or replace function clustering_factor(p_owner in varchar2,p_table_name in varchar2,p_column_name in varchar2) return number is
l_cursor SYS_REFCURSOR;
l_clustering_factor integer:=0;
l_block_nr integer:=0;
l_file_nr integer:=0;
l_previous_block_nr integer:=0;
l_previous_file_nr integer:=0;
begin
open l_cursor for 'select dbms_rowid.rowid_block_number(rowid) block_nr,'||' dbms_rowid.rowid_to_absolute_fno(rowid,'''||p_owner||''','''||p_table_name||''') file_nr '||' from '||p_owner||'.'||p_table_name||' '||' where '||p_column_name||' is not null '||'order by '||p_column_name;
loop
fetch l_cursor into l_block_nr,l_file_nr;
exit when (l_cursor%notfound);
if l_block_nr<>l_previous_block_nr or l_file_nr<>l_previous_file_nr then
l_clustering_factor:=l_clustering_factor+1;
else
null;
end if;
l_previous_block_nr:=l_block_nr;
l_previous_file_nr:=l_file_nr;
end loop;
close l_cursor;
return l_clustering_factor;
end;
验证:
SQL> select index_name,clustering_factor from user_indexes where index_name='T6_IND';
INDEX_NAME CLUSTERING_FACTOR
------------------------------ -----------------
T6_IND 376
select * from user_ind_columns --查询索引、表、列的对应关系
SQL> select clustering_factor('HR','T6','OBJECT_ID') from dual;
CLUSTERING_FACTOR('HR','T6','OBJECT_ID')
----------------------------------------
376
SQL> select index_name,clustering_factor from user_indexes where index_name='T3_IND';
INDEX_NAME CLUSTERING_FACTOR
------------------------------ -----------------
T3_IND 353
SQL> select clustering_factor('HR','T5','OBJECT_ID') from dual;
CLUSTERING_FACTOR('HR','T5','OBJECT_ID')
----------------------------------------
353
案例:聚簇因子是怎样影响执行计划的。
SQL> drop table t;
Table dropped.
SQL> create table t as select object_id,object_name from all_objects;
Table created.
SQL> create index t_ind on t(object_id);
Index created.
SQL> drop table t1;
Table dropped.
SQL> create table t1 as select * from t where rownum=1;
Table created.
SQL> alter table t1 minimize records_per_block; --将表的属性改为在块中尽量存储最少的行。
Table altered.
SQL> insert into t1 select * from t;
40944 rows created.
SQL> commit;
Commit complete.
SQL> create index t1_ind on t1(object_id);
Index created.
SQL> exec dbms_stats.gather_table_stats(user,'t',cascade=>true);
PL/SQL procedure successfully completed.
SQL> exec dbms_stats.gather_table_stats(user,'t1',cascade=>true);
PL/SQL procedure successfully completed.
SQL> select table_name,num_rows,blocks from user_tables where table_name in('T','T1');
TABLE_NAME NUM_ROWS BLOCKS
------------------------------ ---------- ----------
T 40944 213
T1 40945 21320
SQL> select table_name,index_name,num_rows,leaf_blocks,clustering_factor from user_indexes where table_name in('T','T1');
TABLE_NAME INDEX_NAME NUM_ROWS LEAF_BLOCKS CLUSTERING_FACTOR
------------------------------ ------------------------------ ---------- ----------- -----------------
T T_IND 40944 91 353
T1 T1_IND 40945 91 33469
结论:数据分布在数据块上越多,索引聚合因子也就越大。
SQL> select * from t where object_id<1000;
64 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 1376202287
-------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 540 | 16200 | 8 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID| T | 540 | 16200 | 8 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | T_IND | 540 | | 3 (0)| 00:00:01 |
-------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("OBJECT_ID"<1000)
Statistics
----------------------------------------------------------
0 recursive calls
0 db block gets
13 consistent gets
0 physical reads
0 redo size
2603 bytes sent via SQL*Net to client
444 bytes received via SQL*Net from client
6 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
64 rows processed
SQL> select * from t1 where object_id<1000;
65 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 2059591622
--------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 540 | 16200 | 445 (0)| 00:00:06 |
| 1 | TABLE ACCESS BY INDEX ROWID| T1 | 540 | 16200 | 445 (0)| 00:00:06 |
|* 2 | INDEX RANGE SCAN | T1_IND | 540 | | 3 (0)| 00:00:01 |
--------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("OBJECT_ID"<1000)
Statistics
----------------------------------------------------------
0 recursive calls
0 db block gets
42 consistent gets
0 physical reads
0 redo size
2608 bytes sent via SQL*Net to client
444 bytes received via SQL*Net from client
6 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
65 rows processed
结论:执行路径是一样的,聚合因子高的索引产生了更多的一致性读,COST也更大。
如果不用rowid来扫表,此时和聚簇因子没有关系了。
SQL> select object_id from t where object_id<1000;
64 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 422821423
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 540 | 2700 | 3 (0)| 00:00:01 |
|* 1 | INDEX RANGE SCAN| T_IND | 540 | 2700 | 3 (0)| 00:00:01 |
--------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - access("OBJECT_ID"<1000)
Statistics
----------------------------------------------------------
1 recursive calls
0 db block gets
7 consistent gets
0 physical reads
0 redo size
1339 bytes sent via SQL*Net to client
444 bytes received via SQL*Net from client
6 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
64 rows processed
SQL> select object_id from t1 where object_id<1000;
65 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 2474755989
---------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 540 | 2700 | 3 (0)| 00:00:01 |
|* 1 | INDEX RANGE SCAN| T1_IND | 540 | 2700 | 3 (0)| 00:00:01 |
---------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - access("OBJECT_ID"<1000)
Statistics
----------------------------------------------------------
1 recursive calls
0 db block gets
7 consistent gets
0 physical reads
0 redo size
1340 bytes sent via SQL*Net to client
444 bytes received via SQL*Net from client
6 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
65 rows processed
块中的行密度越低,越容易用索引,但是性能不一定最高。
----表列值的存储排列顺序和索引列值的排列顺序一致和非一致对聚簇因子和执行计划的影响-----------
SQL> conn hr/hr
Connected.
SQL> create table t_colocated(id number,col2 varchar2(100));
Table created.
SQL> begin
2 for i in 1..100000 loop
3 insert into t_colocated values(i,rpad(dbms_random.random,95,'*')); --ID列存储的时候是排序的
4 end loop;
5 end;
6 /
PL/SQL procedure successfully completed.
SQL> commit;
Commit complete.
SQL> alter table t_colocated add constraint pk_t_colocated primary key(id);
Table altered.
SQL> create table t_disorganized as select id,col2 from t_colocated order by col2; --ID列是散乱的。
Table created. --克隆表的方式在表中t_disorganized装入数据
SQL> alter table t_disorganized add constraint pk_t_disorganized primary key(id);
Table altered.
SQL> select table_name,index_name,num_rows,leaf_blocks,clustering_factor from user_indexes where index_name in(upper('pk_t_colocated'),upper('pk_t_disorganized'));
TABLE_NAME INDEX_NAME NUM_ROWS LEAF_BLOCKS CLUSTERING_FACTOR
------------------------------ ------------------------------ ---------- ----------- -----------------
T_COLOCATED PK_T_COLOCATED 100000 208 1469
T_DISORGANIZED PK_T_DISORGANIZED 100000 208 99935
SQL> select clustering_factor('HR',upper('t_disorganized'),'ID') from dual;
CLUSTERING_FACTOR('HR',UPPER('T_DISORGANIZED'),'ID')
----------------------------------------------------
99935
SQL> select clustering_factor('HR',upper('t_colocated'),'ID') from dual;
CLUSTERING_FACTOR('HR',UPPER('T_COLOCATED'),'ID')
-------------------------------------------------
1469
ID列次序乱了,查询的数据的块不是依次顺序的,聚簇因子也会发生变化,对执行计划的影响。
SQL> set autotrace trace exp
SQL> set linesize 1000
SQL> select * from t_disorganized where id<100;
Execution Plan
----------------------------------------------------------
Plan hash value: 290015569
-------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 99 | 6435 | 127 (0)| 00:00:02 |
| 1 | TABLE ACCESS BY INDEX ROWID| T_DISORGANIZED | 99 | 6435 | 127 (0)| 00:00:02 |
|* 2 | INDEX RANGE SCAN | PK_T_DISORGANIZED | 99 | | 2 (0)| 00:00:01 |
-------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("ID"<100)
Note
-----
- dynamic sampling used for this statement
SQL> select * from t_colocated where id<100;
Execution Plan
----------------------------------------------------------
Plan hash value: 4204525375
----------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 99 | 6435 | 4 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID| T_COLOCATED | 99 | 6435 | 4 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | PK_T_COLOCATED | 99 | | 2 (0)| 00:00:01 |
----------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("ID"<100)
Note
-----
- dynamic sampling used for this statement
-------------------------dbms_stats包的详细用法------------------------------------
DBMS_STATS.GATHER_TABLE_STATS (
ownname VARCHAR2,
tabname VARCHAR2,
partname VARCHAR2 DEFAULT NULL,
estimate_percent NUMBER DEFAULT to_estimate_percent_type
(get_param('ESTIMATE_PERCENT')),
block_sample BOOLEAN DEFAULT FALSE,
method_opt VARCHAR2 DEFAULT get_param('METHOD_OPT'),
degree NUMBER DEFAULT to_degree_type(get_param('DEGREE')),
granularity VARCHAR2 DEFAULT GET_PARAM('GRANULARITY'),
cascade BOOLEAN DEFAULT to_cascade_type(get_param('CASCADE')),
stattab VARCHAR2 DEFAULT NULL,
statid VARCHAR2 DEFAULT NULL,
statown VARCHAR2 DEFAULT NULL,
no_invalidate BOOLEAN DEFAULT to_no_invalidate_type (
get_param('NO_INVALIDATE')),
force BOOLEAN DEFAULT FALSE);
degree——分析的并行度
并行——几个进程来一起完成这个任务。
SQL> exec dbms_stats.gather_table_stats(user,'t_disorganized',cascade=>true,degree=>5); --5个子进程一起完成这个分析
PL/SQL procedure successfully completed.
对象本身有并行度:
SQL> select table_name,degree from user_tables;
TABLE_NAME DEGREE
------------------------------ --------------------
XX2 1
TX 1
T_COLOCATED 1
T2 1
T3 1
T4 1
T5 1
T6 1
EMP 1
T7 1
T8 1
TEST1 1
TEST 1
T_DISORGANIZED 1
T1 1
TT 1
T 1
COUNTRIES 1
JOBS 1
LOCATIONS 1
REGIONS 1
JOB_HISTORY 1
EMPLOYEES 1
DEPARTMENTS 1
24 rows selected.
默认的在表上的并行度都是1,也就是不并行。所以在分析的时候要指定degree参数。
SQL> alter table t_disorganized parallel 5;
Table altered.
SQL> select table_name,degree from user_tables where table_name='T_DISORGANIZED';
TABLE_NAME DEGREE
------------------------------ --------------------
T_DISORGANIZED 5
SQL> exec dbms_stats.gather_table_stats(user,'t_disorganized',cascade=>true);
PL/SQL procedure successfully completed. --此时不用指定了,默认是按照5的并行度来执行的。
granularity——参数
'ALL' - 对表的全局作分析,包括分区和子分区。
'AUTO'- 缺省,oracle自动根据分区的类型来决定用哪种粒度来分析
'DEFAULT' - 10g中这个参数是废弃的。
'GLOBAL' - 分析表的全局,不包括分区和子分区。
'GLOBAL AND PARTITION' - 对全局分析和分区分析,不分析子分区
'PARTITION '- 仅仅分析分区
'SUBPARTITION' - 仅仅分析子分区
案例:
SQL> create table ttx(id int) partition by range(id)
2 (partition p1 values less than(5),
3 partition p2 values less than(10),
4 partition p3 values less than(15)
5 );
Table created.
SQL> select segment_name,partition_name,segment_type from user_segments where segment_name='TTX';
SEGMENT_NAME PARTITION_NAME SEGMENT_TYPE
--------------------------------------------------------------------------------- ------------------------------ ------
TTX P1 TABLE PARTITION
TTX P2 TABLE PARTITION
TTX P3 TABLE PARTITION
物理上这个表有三个segment,根据ID的值来选择存放的段。
数据的存储:
SQL> insert into ttx values(1);
1 row created.
SQL> insert into ttx values(6);
1 row created.
SQL> insert into ttx values(11);
1 row created.
SQL> commit;
Commit complete.
SQL> select * from ttx;
ID
----------
1
6
11
SQL> select * from ttx partition(p1);
ID
----------
1
SQL> select * from ttx partition(p2);
ID
----------
6
SQL> select * from ttx partition(p3);
ID
----------
11
索引在分区表上如何存储的。
SQL> select segment_name,partition_name,segment_type from user_segments where segment_name='IND_TTX'; --全局索引
SEGMENT_NAME PARTITION_NAME SEGMENT_TYPE
--------------------------------------------------------------------------------- --------------------------
IND_TTX INDEX
SQL> drop index ind_ttx;
Index dropped.
SQL> create index ind_ttx on ttx(id) local;
Index created.
SQL> select segment_name,partition_name,segment_type from user_segments where segment_name='IND_TTX';
SEGMENT_NAME PARTITION_NAME SEGMENT_TYPE
--------------------------------------------------------------------------------- ----------------------------
IND_TTX P1 INDEX PARTITION
IND_TTX P2 INDEX PARTITION
IND_TTX P3 INDEX PARTITION
分区索引(本地索引),根据表的分区情况将索引分区,3个segment
分析:
SQL> exec dbms_stats.gather_table_stats(user,'ttx',cascade=>true);
PL/SQL procedure successfully completed.
确认表和分区的统计信息都存在:
表的全局统计信息:
SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';
NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
---------- ----------- ---------- -------------------
3 3 15 2013-08-09 10:07:59
表的分区统计信息:
SQL> select partition_name,num_rows,avg_row_len,blocks,last_analyzed from user_tab_partitions where table_name='TTX';
PARTITION_NAME NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
------------------------------ ---------- ----------- ---------- -------------------
P1 1 3 5 2013-08-09 10:07:59
P2 1 3 5 2013-08-09 10:07:59
P3 1 3 5 2013-08-09 10:07:59
每个分区有1行数据,每个分区段都有一个extent,每个extent有8个数据块:
select * from user_extents where partition_name='P1'
但是为什么统计显示是5个数据块?前三个块是位图。
确认下索引和索引分区的统计信息是存在的。
SQL> select blevel,leaf_blocks,distinct_keys,last_analyzed from user_indexes where table_name='TTX';
BLEVEL LEAF_BLOCKS DISTINCT_KEYS LAST_ANALYZED
---------- ----------- ------------- -------------------
0 3 3 2013-08-09 10:07:59
SQL> select partition_name,num_rows,blevel,leaf_blocks,last_analyzed from user_ind_partitions where index_name='IND_TTX';
PARTITION_NAME NUM_ROWS BLEVEL LEAF_BLOCKS LAST_ANALYZED
------------------------------ ---------- ---------- ----------- -------------------
P1 1 0 1 2013-08-09 10:07:59
P2 1 0 1 2013-08-09 10:07:59
P3 1 0 1 2013-08-09 10:07:59
给表添加一个分区
SQL> alter table ttx add partition pm values less than(maxvalue);
Table altered.
给新的分区添加数据
SQL> begin
2 for i in 1..10000 loop
3 insert into ttx values(16);
4 end loop;
5 end;
6 /
PL/SQL procedure successfully completed.
SQL> commit;
Commit complete.
SQL> select count(1) from ttx partition(pm);
COUNT(1)
----------
10000
看分区索引的统计信息:
SQL> select partition_name,num_rows,blevel,leaf_blocks,last_analyzed from user_ind_partitions where index_name='IND_TTX';
PARTITION_NAME NUM_ROWS BLEVEL LEAF_BLOCKS LAST_ANALYZED
------------------------------ ---------- ---------- ----------- -------------------
P1 1 0 1 2013-08-09 10:07:59
P2 1 0 1 2013-08-09 10:07:59
P3 1 0 1 2013-08-09 10:07:59
PM --新增的分区是没有统计信息的
看表的全局信息
SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';
NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
---------- ----------- ---------- -------------------
3 3 15 2013-08-09 10:07:59 --表的全局信息没有变化
执行计划的影响:
SQL> select * from ttx where id=16;
Execution Plan
----------------------------------------------------------
Plan hash value: 3737425109
--------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Pstart| Pstop |
--------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 3 | 1 (0)| 00:00:01 | | |
| 1 | PARTITION RANGE SINGLE| | 1 | 3 | 1 (0)| 00:00:01 | 4 | 4 |
|* 2 | INDEX RANGE SCAN | IND_TTX | 1 | 3 | 1 (0)| 00:00:01 | 4 | 4 |
--------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("ID"=16)
Pstart——查询扫描读取分区的起始分区号
Pstop——查询扫描读取分区的结束分区号
ROWS的估计值是严重失误的,没有分析新增加的分区,导致CBO作出了错误的判断。
A 只对分区作分析,不对全局作分析
SQL> exec dbms_stats.gather_table_stats(user,'ttx',partname=>'pm',estimate_percent=>100,granularity=>'partition');
PL/SQL procedure successfully completed.
查看分区的信息:
SQL> select partition_name,num_rows,blevel,leaf_blocks,last_analyzed from user_ind_partitions where index_name='IND_TTX';
PARTITION_NAME NUM_ROWS BLEVEL LEAF_BLOCKS LAST_ANALYZED
------------------------------ ---------- ---------- ----------- -------------------
P1 1 0 1 2013-08-09 10:07:59
P2 1 0 1 2013-08-09 10:07:59
P3 1 0 1 2013-08-09 10:07:59
PM 10000 1 27 2013-08-09 10:23:32
SQL> select partition_name,num_rows,avg_row_len,blocks,last_analyzed from user_tab_partitions where table_name='TTX';
PARTITION_NAME NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
------------------------------ ---------- ----------- ---------- -------------------
P1 1 3 5 2013-08-09 10:07:59
P2 1 3 5 2013-08-09 10:07:59
P3 1 3 5 2013-08-09 10:07:59
PM 10000 3 20 2013-08-09 10:23:32
确认全局信息没有分析:
SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';
NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
---------- ----------- ---------- -------------------
3 3 15 2013-08-09 10:07:59
SQL> select blevel,leaf_blocks,distinct_keys,last_analyzed from user_indexes where table_name='TTX';
BLEVEL LEAF_BLOCKS DISTINCT_KEYS LAST_ANALYZED
---------- ----------- ------------- -------------------
0 3 3 2013-08-09 10:07:59
SQL> select * from ttx where id=16; --此时执行计划是正确的
Execution Plan
----------------------------------------------------------
Plan hash value: 701592076
-----------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Pstart| Pstop |
-----------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 9999 | 29997 | 6 (0)| 00:00:01 | | |
| 1 | PARTITION RANGE SINGLE| | 9999 | 29997 | 6 (0)| 00:00:01 | 4 | 4 |
|* 2 | TABLE ACCESS FULL | TTX | 9999 | 29997 | 6 (0)| 00:00:01 | 4 | 4 |
-----------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("ID"=16)
没有对全局分析执行计划计划在什么情况下会异常?
对全局数据作出变化:
SQL> alter table ttx add object_name varchar2(20);
Table altered.
SQL> update ttx set object_name='AAAA';
10003 rows updated.
SQL> commit;
Commit complete.
SQL> create index ind2_ttx on ttx(object_name);
Index created.
SQL> exec dbms_stats.gather_table_stats(user,'ttx',partname=>'pm',estimate_percent=>100,granularity=>'partition');
PL/SQL procedure successfully completed. --只分析分区不分析全局
SQL> select * from ttx where id=16; --查询ID列是没有问题的
Execution Plan
----------------------------------------------------------
Plan hash value: 701592076
-----------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Pstart| Pstop |
-----------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 9999 | 7××× | 11 (0)| 00:00:01 | | |
| 1 | PARTITION RANGE SINGLE| | 9999 | 7××× | 11 (0)| 00:00:01 | 4 | 4 |
|* 2 | TABLE ACCESS FULL | TTX | 9999 | 7××× | 11 (0)| 00:00:01 | 4 | 4 |
-----------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("ID"=16)
SQL> select * from ttx where object_name='AAAA'; --查询name列错误了。
Execution Plan
----------------------------------------------------------
Plan hash value: 1144724227
--------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Pstart| Pstop |
--------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 3 | 9 | 5 (0)| 00:00:01 | | |
| 1 | PARTITION RANGE ALL| | 3 | 9 | 5 (0)| 00:00:01 | 1 | 4 | --表示扫描分区1~4
|* 2 | TABLE ACCESS FULL | TTX | 3 | 9 | 5 (0)| 00:00:01 | 1 | 4 |
--------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("OBJECT_NAME"='AAAA')
Note
-----
- dynamic sampling used for this statement
只对分区分析不对全局分析,查询只是涉及分区的时候,执行计划是没有问题的。查询涉及全局的时候,执行计划是错误的。
SQL> exec dbms_stats.gather_table_stats(user,'ttx',estimate_percent=>100,granularity=>'global'); --全局分析
PL/SQL procedure successfully completed.
SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';
NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
---------- ----------- ---------- -------------------
10003 8 58 2013-08-09 10:31:14
SQL> select blevel,leaf_blocks,distinct_keys,last_analyzed from user_indexes where table_name='TTX';
BLEVEL LEAF_BLOCKS DISTINCT_KEYS LAST_ANALYZED
---------- ----------- ------------- -------------------
1 30 5 2013-08-09 10:31:14
1 28 1 2013-08-09 10:31:14
SQL> select * from ttx where object_name='AAAA';
Execution Plan
----------------------------------------------------------
Plan hash value: 1144724227
--------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Pstart| Pstop |
--------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 10003 | 80024 | 14 (0)| 00:00:01 | | |
| 1 | PARTITION RANGE ALL| | 10003 | 80024 | 14 (0)| 00:00:01 | 1 | 4 |
|* 2 | TABLE ACCESS FULL | TTX | 10003 | 80024 | 14 (0)| 00:00:01 | 1 | 4 |
--------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("OBJECT_NAME"='AAAA')
总结:即使分区级别有信息了,但是没有对全局分析,当查询涉及到全局数据,而全局数据在上一次分析之后有数据或者结构的变化的时候,依然导致错误的执行计划。
B 只对全局作分析,不对分区作分析
删除最后一个分区的数据
SQL> delete from ttx where id>15;
10000 rows deleted.
SQL> commit;
Commit complete.
先对分区和全局都分析
SQL> exec dbms_stats.gather_table_stats(user,'ttx',estimate_percent=>100,cascade=>true);
PL/SQL procedure successfully completed.
SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';
NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
---------- ----------- ---------- -------------------
3 8 58 2013-08-09 10:35:43
SQL> select partition_name,num_rows,avg_row_len,blocks,last_analyzed from user_tab_partitions where table_name='TTX';
PARTITION_NAME NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
------------------------------ ---------- ----------- ---------- -------------------
P1 1 8 5 2013-08-09 10:35:43
P2 1 8 5 2013-08-09 10:35:43
P3 1 8 5 2013-08-09 10:35:43
PM 0 0 43 2013-08-09 10:35:43
往第四个分区中加数据
SQL> begin
2 for i in 1..10000 loop
3 insert into ttx values(16,'');
4 end loop;
5 end;
6 /
PL/SQL procedure successfully completed.
SQL> commit;
Commit complete.
让数据严重倾斜
SQL> update ttx set id=1000 where id=16 and rownum=1;
1 row updated.
SQL> commit;
Commit complete.
只对全局作分析不对分区作分析,且不做直方图分析
SQL> exec dbms_stats.gather_table_stats(user,'ttx',estimate_percent=>100,granularity=>'global',method_opt=>'for all columns size 1',cascade=>true);
PL/SQL procedure successfully completed.
确认全局信息是新的,分区没有分析:
SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';
NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
---------- ----------- ---------- -------------------
10003 4 58 2013-08-09 10:41:08
SQL> select blevel,leaf_blocks,distinct_keys,last_analyzed from user_indexes where table_name='TTX';
BLEVEL LEAF_BLOCKS DISTINCT_KEYS LAST_ANALYZED
---------- ----------- ------------- -------------------
1 31 5 2013-08-09 10:41:08
1 1 1 2013-08-09 10:41:08
SQL> select partition_name,num_rows,avg_row_len,blocks,last_analyzed from user_tab_partitions where table_name='TTX';
PARTITION_NAME NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
------------------------------ ---------- ----------- ---------- -------------------
P1 1 8 5 2013-08-09 10:35:43
P2 1 8 5 2013-08-09 10:35:43
P3 1 8 5 2013-08-09 10:35:43
PM 0 0 43 2013-08-09 10:35:43
SQL> select partition_name,num_rows,blevel,leaf_blocks,last_analyzed from user_ind_partitions where index_name='IND_TTX';
PARTITION_NAME NUM_ROWS BLEVEL LEAF_BLOCKS LAST_ANALYZED
------------------------------ ---------- ---------- ----------- -------------------
P1 1 0 1 2013-08-09 10:35:43
P2 1 0 1 2013-08-09 10:35:43
P3 1 0 1 2013-08-09 10:35:43
PM 0 1 0 2013-08-09 10:35:43
SQL> select * from ttx where id=16;
Execution Plan
----------------------------------------------------------
Plan hash value: 2572159449
--------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Pstart| Pstop |
--------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 25 | 1 (0)| 00:00:01 | | |
| 1 | PARTITION RANGE SINGLE | | 1 | 25 | 1 (0)| 00:00:01 | 4 | 4 |
| 2 | TABLE ACCESS BY LOCAL INDEX ROWID| TTX | 1 | 25 | 1 (0)| 00:00:01 | 4 | 4 |
|* 3 | INDEX RANGE SCAN | IND_TTX | 1 | | 1 (0)| 00:00:01 | 4 | 4 |
--------------------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("ID"=16)
加上直方图信息。
SQL> exec dbms_stats.gather_table_stats(user,'ttx',estimate_percent=>100,granularity=>'global',method_opt=>'for all columns size skewonly',cascade=>true);
PL/SQL procedure successfully completed.
select * from user_tab_col_statistics where table_name='TTX' --确认有直方图信息了。
SQL> select * from ttx where id=16;
Execution Plan
----------------------------------------------------------
Plan hash value: 2572159449
--------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Pstart| Pstop |
--------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 25 | 1 (0)| 00:00:01 | | |
| 1 | PARTITION RANGE SINGLE | | 1 | 25 | 1 (0)| 00:00:01 | 4 | 4 |
| 2 | TABLE ACCESS BY LOCAL INDEX ROWID| TTX | 1 | 25 | 1 (0)| 00:00:01 | 4 | 4 |
|* 3 | INDEX RANGE SCAN | IND_TTX | 1 | | 1 (0)| 00:00:01 | 4 | 4 |
--------------------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("ID"=16)
执行计划还是错误的,说明主要不是直方图引起的。主要还是因为没有收集分区的信息导致的。
SQL> exec dbms_stats.gather_table_stats(user,'ttx',estimate_percent=>100,granularity=>'partition',method_opt=>'for all columns size 1',cascade=>true);
PL/SQL procedure successfully completed. 分析分区但是不收集直方图。
SQL> select partition_name,num_rows,avg_row_len,blocks,last_analyzed from user_tab_partitions where table_name='TTX';
PARTITION_NAME NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
------------------------------ ---------- ----------- ---------- -------------------
P1 1 8 5 2013-08-09 10:46:33
P2 1 8 5 2013-08-09 10:46:33
P3 1 8 5 2013-08-09 10:46:33
PM 10000 3 43 2013-08-09 10:46:33
SQL> select partition_name,num_rows,blevel,leaf_blocks,last_analyzed from user_ind_partitions where index_name='IND_TTX';
PARTITION_NAME NUM_ROWS BLEVEL LEAF_BLOCKS LAST_ANALYZED
------------------------------ ---------- ---------- ----------- -------------------
P1 1 0 1 2013-08-09 10:46:33
P2 1 0 1 2013-08-09 10:46:33
P3 1 0 1 2013-08-09 10:46:33
PM 10000 1 28 2013-08-09 10:46:33
SQL> select * from ttx where id=16;
Execution Plan
----------------------------------------------------------
Plan hash value: 701592076
-----------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Pstart| Pstop |
-----------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 5000 | 15000 | 11 (0)| 00:00:01 | | |
| 1 | PARTITION RANGE SINGLE| | 5000 | 15000 | 11 (0)| 00:00:01 | 4 | 4 |
|* 2 | TABLE ACCESS FULL | TTX | 5000 | 15000 | 11 (0)| 00:00:01 | 4 | 4 |
-----------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("ID"=16)
bucket只有1个,分区中,最大值是1000,最小值是16,共10000行,按照均匀分布,各5000行。
SQL> exec dbms_stats.gather_table_stats(user,'ttx',estimate_percent=>100,granularity=>'partition',method_opt=>'for all columns size skewonly',cascade=>true); --重新收集直方图信息
PL/SQL procedure successfully completed.
SQL> select * from ttx where id=16;
Execution Plan
----------------------------------------------------------
Plan hash value: 701592076
-----------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Pstart| Pstop |
-----------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 9999 | 29997 | 11 (0)| 00:00:01 | | |
| 1 | PARTITION RANGE SINGLE| | 9999 | 29997 | 11 (0)| 00:00:01 | 4 | 4 |
|* 2 | TABLE ACCESS FULL | TTX | 9999 | 29997 | 11 (0)| 00:00:01 | 4 | 4 |
-----------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("ID"=16)
必须对全局和分析都分析才能得到精确的执行计划,但是上面的例子是很极端的例子,
将所有可能出现的问题都放大了,优化效果很明显。
但是实际的环境中,系统的繁忙程度决定了我们可能不允许时时对大表作全局分析,采取策略?
建议的策略:
1、所有的SQL语句是不是仅仅在分区完成的,如果是,那么没有必要去分析全局,只分析新增的分区即可;
如果有些SQL是跨分区查询的,考虑新增分区的数据是不是占比很大,如果仅仅很少比列的数据,
可以考虑不进行全局分析,如果薪增的分区占比很大,就需要全局分析+分区分析。
2、如果企业的数据库本身容量就不大,全局分析+分区分析也未尝不可。
3、如果列值分布均匀,考虑将直方图分析取消掉,减小全局分析的压力。
------------------------------------------------------------------------
dbms_stats中保存分析数据的方法:
A 创建一个保存性能数据的表
SQL> exec dbms_stats.create_stat_table(user,'stat_tab','mytbs3');
PL/SQL procedure successfully completed.
SQL> set linesize 100
SQL> desc stat_tab --这个表用来转存性能数据的,不需要关注字段的含义
Name Null? Type
----------------------------------------------------- -------- ------------------------------------
STATID VARCHAR2(30)
TYPE CHAR(1)
VERSION NUMBER
FLAGS NUMBER
C1 VARCHAR2(30)
C2 VARCHAR2(30)
C3 VARCHAR2(30)
C4 VARCHAR2(30)
C5 VARCHAR2(30)
N1 NUMBER
N2 NUMBER
N3 NUMBER
N4 NUMBER
N5 NUMBER
N6 NUMBER
N7 NUMBER
N8 NUMBER
N9 NUMBER
N10 NUMBER
N11 NUMBER
N12 NUMBER
D1 DATE
R1 RAW(32)
R2 RAW(32)
CH1 VARCHAR2(1000)
B 删除这个表的话。
SQL> exec dbms_stats.drop_stat_table(user,'stat_tab');
PL/SQL procedure successfully completed.
SQL> desc stat_tab
ERROR:
ORA-04043: object stat_tab does not exist
SQL> exec dbms_stats.create_stat_table(user,'stat_tab','mytbs3');
PL/SQL procedure successfully completed.
C 保存数据——收集性能数据的时候同时将性能数据保存在这个表中。
场景A:收集性能数据的时候转存性能数据。
SQL> exec dbms_stats.gather_table_stats(user,'ttx',stattab=>'stat_tab',cascade=>true);
PL/SQL procedure successfully completed.
SQL> select count(1) from stat_tab;
COUNT(1)
----------
26
场景B:如果在数据字典表已经有了性能数据,现在不能再去收集,可以将已经有的性能数据导入到这个表中,表已经建好。
SQL> truncate table stat_tab;
Table truncated.
SQL> exec dbms_stats.export_table_stats(user,'ttx',stattab=>'stat_tab',cascade=>true);
PL/SQL procedure successfully completed.
SQL> select count(1) from stat_tab;
COUNT(1)
----------
26
一个用户下,stattab只要一个就够了,所有表、索引、列的性能数据都能存储在这个表中。
场景C:误删除了TTX表的性能数据,用这个表的数据来恢复
SQL> exec dbms_stats.delete_table_stats(user,'ttx');
PL/SQL procedure successfully completed.
SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';
NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
---------- ----------- ---------- -------------------
将性能数据恢复
SQL> exec dbms_stats.import_table_stats(user,'ttx',stattab=>'stat_tab',cascade=>true);
PL/SQL procedure successfully completed.
SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';
NUM_ROWS AVG_ROW_LEN BLOCKS LAST_ANALYZED
---------- ----------- ---------- -------------------
10003 4 58 2013-08-09 11:25:43
数据迁移的时候和数据切割的时候,性能数据的处理。
--------------------------------
锁定分析数据
当一个执行计划被调试稳定之后,希望性能数据能保存下来,不被其他用户修改,可以将性能数据锁定。
SQL> exec dbms_stats.lock_table_stats(user,'ttx');
PL/SQL procedure successfully completed.
再想分析的话就会报错的。
SQL> exec dbms_stats.gather_table_stats(user,'ttx',cascade=>true);
BEGIN dbms_stats.gather_table_stats(user,'ttx',cascade=>true); END;
*
ERROR at line 1:
ORA-20005: object statistics are locked (stattype = ALL)
ORA-06512: at "SYS.DBMS_STATS", line 13437
ORA-06512: at "SYS.DBMS_STATS", line 13457
ORA-06512: at line 1
解决办法:
1、解锁
SQL> exec dbms_stats.unlock_table_stats(user,'ttx');
PL/SQL procedure successfully completed.
SQL> exec dbms_stats.gather_table_stats(user,'ttx',cascade=>true);
PL/SQL procedure successfully completed.
2、强制覆盖
SQL> exec dbms_stats.gather_table_stats(user,'ttx',cascade=>true,force=>true);
PL/SQL procedure successfully completed.
----------------------------------------------
设置性能数据
使用场合:
1、当相应的性能数据指标不正确的时候,导致执行计划失败,或者当前不允许收集分析信息,可以手工设置性能数据。
2、在测试环境中或者开发环境中,调试执行计划的时候,可以通过此方法模拟生产的性能数据。
目的:作执行计划的调试。
设置表的性能数据
DBMS_STATS.SET_TABLE_STATS (
ownname VARCHAR2,
tabname VARCHAR2,
partname VARCHAR2 DEFAULT NULL,
stattab VARCHAR2 DEFAULT NULL,
statid VARCHAR2 DEFAULT NULL,
numrows NUMBER DEFAULT NULL,
numblks NUMBER DEFAULT NULL,
avgrlen NUMBER DEFAULT NULL,
flags NUMBER DEFAULT NULL,
statown VARCHAR2 DEFAULT NULL,
no_invalidate BOOLEAN DEFAULT to_no_invalidate_type (
get_param('NO_INVALIDATE')),
cachedblk NUMBER DEFAULT NULL,
cachehit NUMBER DEFUALT NULL,
force BOOLEAN DEFAULT FALSE);
设置列的性能数据
DBMS_STATS.SET_COLUMN_STATS (
ownname VARCHAR2,
tabname VARCHAR2,
colname VARCHAR2,
partname VARCHAR2 DEFAULT NULL,
stattab VARCHAR2 DEFAULT NULL,
statid VARCHAR2 DEFAULT NULL,
distcnt NUMBER DEFAULT NULL,
density NUMBER DEFAULT NULL,
nullcnt NUMBER DEFAULT NULL,
srec StatRec DEFAULT NULL,
avgclen NUMBER DEFAULT NULL,
flags NUMBER DEFAULT NULL,
statown VARCHAR2 DEFAULT NULL,
no_invalidate BOOLEAN DEFAULT to_no_invalidate_type(
get_param('NO_INVALIDATE')),
force BOOLEAN DEFAULT FALSE);
设置索引的性能数据
DBMS_STATS.SET_INDEX_STATS (
ownname VARCHAR2,
indname VARCHAR2,
partname VARCHAR2 DEFAULT NULL,
stattab VARCHAR2 DEFAULT NULL,
statid VARCHAR2 DEFAULT NULL,
numrows NUMBER DEFAULT NULL,
numlblks NUMBER DEFAULT NULL,
numdist NUMBER DEFAULT NULL,
avglblk NUMBER DEFAULT NULL,
avgdblk NUMBER DEFAULT NULL,
clstfct NUMBER DEFAULT NULL,
indlevel NUMBER DEFAULT NULL,
flags NUMBER DEFAULT NULL,
statown VARCHAR2 DEFAULT NULL,
no_invalidate BOOLEAN DEFAULT to_no_invalidate_type(
get_param('NO_INVALIDATE')),
guessq NUMBER DEFAULT NULL,
cachedblk NUMBER DEFAULT NULL,
cachehit NUMBER DEFUALT NULL,
force BOOLEAN DEFAULT FALSE);
SQL> drop table t;
Table dropped.
SQL> create table t as select * from all_objects;
Table created.
SQL> exec dbms_stats.gather_table_stats(user,'t');
PL/SQL procedure successfully completed.
SQL> select * from t; --测试环境
Execution Plan
----------------------------------------------------------
Plan hash value: 1601196873
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 40961 | 3800K| 134 (3)| 00:00:02 |
| 1 | TABLE ACCESS FULL| T | 40961 | 3800K| 134 (3)| 00:00:02 |
--------------------------------------------------------------------------
但是这个表在生产机上有1000万数据。
SQL> exec dbms_stats.set_table_stats(user,'t',numrows=>10000000,numblks=>1000000,avgrlen=>178);
PL/SQL procedure successfully completed.
SQL> select * from t;
Execution Plan
----------------------------------------------------------
Plan hash value: 1601196873
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 10M| 953M| 220K (1)| 00:44:04 |
| 1 | TABLE ACCESS FULL| T | 10M| 953M| 220K (1)| 00:44:04 |
--------------------------------------------------------------------------
-----------------------------------------------
------------------------
表的连接原理
转载于:https://blog.51cto.com/chenhuican/1329174