2013-08-09 星期五


------------直方图对执行计划影响的解决办法-----------------


SQL> create table tt as select 1 id,object_name from all_objects;


Table created.


SQL> update tt set id=99 where rownum=1;  --让ID列分布极度的不均匀,非常倾斜


1 row updated.


SQL> commit;


Commit complete.


SQL> create index ind_tt on tt(id);


Index created.


SQL> exec dbms_stats.gather_table_stats(user,'tt',estimate_percent=>100,method_opt=>'for all columns size skewonly')


PL/SQL procedure successfully completed.


SQL> select endpoint_value,endpoint_number from user_tab_histograms where table_name='TT' and column_name='ID' order by endpoint_number;


ENDPOINT_VALUE ENDPOINT_NUMBER

-------------- ---------------

            1                             40944

           99                            40945   --两个bucket


select * from user_tab_columns where table_name='TT'  --ID频度直方图,object_name等高直方图


SQL> conn hr/hr

Connected.

SQL> explain plan set statement_id '1' for select * from tt where id=1;


Explained.


SQL> explain plan set statement_id '99' for select * from tt where id=99;


Explained.


SQL> select statement_id,cardinality from plan_table where id=0 order by statement_id;


STATEMENT_ID                   CARDINALITY

------------------------------ -----------

1                                    40944

99                                       1



SQL> select * from tt where id=99;



Execution Plan

----------------------------------------------------------

Plan hash value: 3656862534


--------------------------------------------------------------------------------------

| Id  | Operation                   | Name   | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT            |        |     1 |    28 |     2   (0)| 00:00:01 |

|   1 |  TABLE ACCESS BY INDEX ROWID| TT     |     1 |    28 |     2   (0)| 00:00:01 |

|*  2 |   INDEX RANGE SCAN          | IND_TT |     1 |       |     1   (0)| 00:00:01 |

--------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - access("ID"=99)


SQL> select * from tt where id=1;


40944 rows selected.



Execution Plan

----------------------------------------------------------

Plan hash value: 264906180


--------------------------------------------------------------------------

| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------

|   0 | SELECT STATEMENT  |      | 40944 |  1119K|    47   (3)| 00:00:01 |

|*  1 |  TABLE ACCESS FULL| TT   | 40944 |  1119K|    47   (3)| 00:00:01 |

--------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  1 - filter("ID"=1)


注意:如果没有生成直方图信息的话


SQL> exec dbms_stats.gather_table_stats(user,'tt',estimate_percent=>100,method_opt=>'for all columns size 1');


PL/SQL procedure successfully completed.


select * from user_tab_columns where table_name='TT' --确认是NONE


SQL> select endpoint_value,endpoint_number from user_tab_histograms where table_name='TT' and column_name='ID' order by endpoint_number;


ENDPOINT_VALUE ENDPOINT_NUMBER

-------------- ---------------

            1                            0

           99                           1


SQL> conn hr/hr

Connected.

SQL> explain plan set statement_id '1' for select * from tt where id=1;


Explained.


SQL> explain plan set statement_id '99' for select * from tt where id=99;


Explained.


SQL> select statement_id,cardinality from plan_table where id=0 order by statement_id;


STATEMENT_ID                   CARDINALITY

------------------------------ -----------

1                                    20473

99                                   20473


SQL> select * from tt where id=99;



Execution Plan

----------------------------------------------------------

Plan hash value: 264906180


--------------------------------------------------------------------------

| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------

|   0 | SELECT STATEMENT  |      | 20473 |   559K|    47   (3)| 00:00:01 |

|*  1 |  TABLE ACCESS FULL| TT   | 20473 |   559K|    47   (3)| 00:00:01 |

--------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  1 - filter("ID"=99)



SQL> select * from tt where id=1;


40944 rows selected.



Execution Plan

----------------------------------------------------------

Plan hash value: 264906180


--------------------------------------------------------------------------

| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------

|   0 | SELECT STATEMENT  |      | 20473 |   559K|    47   (3)| 00:00:01 |

|*  1 |  TABLE ACCESS FULL| TT   | 20473 |   559K|    47   (3)| 00:00:01 |

--------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  1 - filter("ID"=1)


错误的CAR,错误的PLAN。


总结:当列值分布不均匀的时候,收集直方图以AUTO方式可能不会生成直方图,此时执行计划是不正确的,

     此时需要认为干预。


A 等高直方图,会引起执行计划的误差,误差随着bucket的增加而减少,随着数据量增加而增加。

B 如果直方图类型为NONE,此时,对于分布不均匀的列,会发生plan的严重错误

C 最佳的情况是用频度直方图,但是count(distinct id)>254了,没办法作频度直方图,只能作等高直方图,

 将bucket数量加到254,把误差减到最小!

D 收集直方图的动作封装在dbms_stats包中的,但是封装以后不是很精确了,因为策略都是一样的,

 所以留出了method_opt参数让我们调整。

E 收集直方图信息,对于数据量大的表是有性能损耗的,建议仅对有索引的列收集信息:

 FOR ALL INDEXED COLUMNS [size_clause],根据实际需求SQL来确定哪些列作直方图。

 最主要关注列值分布不均匀的列!!!


------------------------企业数据库中作性能数据收集策略的方法(思路)-------------


1、作配置表——p_config


owner    table_name     estimate_percent    cascade    method_opt                    granularity           analyze_time

------------------------------------------------------------------------------------------------

SCOTT       TT              100              true     for all columns size skewonly     GLOBAL AND PARTITION

 HR         T1               30              true     for INDEXED columns size skewonly  GLOBAL

 。。。。。


 -------------------------------------------------------------------


2、创建SP


for idx in (select * from p_config) loop

  dbms_stats.gather_table_stats(idx.owner,idx.table_name,estimate_percent=>idx.estimate_percent,cascade=>idx.cascade,method_opt=>idx.method_opt...);

end loop;


3、将SP封装在后台JOB中,定时运行收集性能数据。

------------------------------------------------------------------------


表和索引的统计信息在,列的信息没有了,plan发生怎样的变化?


1、将列的信息删除

SQL> exec dbms_stats.delete_column_stats(user,'tt','id');


PL/SQL procedure successfully completed.


select * from user_tab_col_statistics where table_name='TT' and column_name='ID'  --查不到数据


确认表和索引的信息还在:

SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TT';


 NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

---------- ----------- ---------- -------------------

    40945                              28                           202          2013-08-08 14:49:51


SQL> select blevel,leaf_blocks,distinct_keys,last_analyzed from user_indexes where table_name='TT';


   BLEVEL LEAF_BLOCKS DISTINCT_KEYS LAST_ANALYZED

---------- ----------- ------------- -------------------

                   1                    80                          2                            2013-08-08 14:49:51


SQL> select * from tt where id=1;


40944 rows selected.



Execution Plan

----------------------------------------------------------

Plan hash value: 3656862534


--------------------------------------------------------------------------------------

| Id  | Operation                   | Name   | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT            |        |   409 | 11452 |    41   (0)| 00:00:01 |

|   1 |  TABLE ACCESS BY INDEX ROWID| TT     |   409 | 11452 |    41   (0)| 00:00:01 |

|*  2 |   INDEX RANGE SCAN          | IND_TT |   164 |       |    40   (0)| 00:00:01 |

--------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - access("ID"=1)


SQL> select count(1) from tt where id=1;


 COUNT(1)

----------

    40944


SQL> select * from tt where id=99;  --当前的级别下(级别2),列的信息缺失不会作动态采样,当级别为3的时候会采样。



Execution Plan

----------------------------------------------------------

Plan hash value: 3656862534


--------------------------------------------------------------------------------------

| Id  | Operation                   | Name   | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT            |        |   409 | 11452 |    41   (0)| 00:00:01 |

|   1 |  TABLE ACCESS BY INDEX ROWID| TT     |   409 | 11452 |    41   (0)| 00:00:01 |

|*  2 |   INDEX RANGE SCAN          | IND_TT |   164 |       |    40   (0)| 00:00:01 |

--------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - access("ID"=99)



SQL> select count(1) from tt where id=99;


 COUNT(1)

----------

           1


虽然表和索引的统计信息还在,但是列的信息没了,直方图更加没有了。CBO无法知道列值的分布情况的,没有办法给出正确的执行计划。


将采样级别改到3:

SQL> show parameter dynamic


NAME                                 TYPE        VALUE

------------------------------------ ----------- ------------------------------

optimizer_dynamic_sampling           integer     2

SQL> alter system set optimizer_dynamic_sampling=3 scope=both;


System altered.


SQL> show parameter dynamic


NAME                                 TYPE        VALUE

------------------------------------ ----------- ------------------------------

optimizer_dynamic_sampling           integer     3


SQL> select * from tt where id=99;



Execution Plan

----------------------------------------------------------

Plan hash value: 3656862534


--------------------------------------------------------------------------------------

| Id  | Operation                   | Name   | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT            |        |     7 |   196 |     2   (0)| 00:00:01 |

|   1 |  TABLE ACCESS BY INDEX ROWID| TT     |     7 |   196 |     2   (0)| 00:00:01 |

|*  2 |   INDEX RANGE SCAN          | IND_TT |     7 |       |     1   (0)| 00:00:01 |

--------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - access("ID"=99)


Note

-----

  - dynamic sampling used for this statement


SQL> select * from tt where id=1;


40944 rows selected.



Execution Plan

----------------------------------------------------------

Plan hash value: 264906180


--------------------------------------------------------------------------

| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------

|   0 | SELECT STATEMENT  |      | 40938 |  1119K|    47   (3)| 00:00:01 |

|*  1 |  TABLE ACCESS FULL| TT   | 40938 |  1119K|    47   (3)| 00:00:01 |

--------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  1 - filter("ID"=1)


Note

-----

  - dynamic sampling used for this statement


结论:

采样级别>=3,列的信息缺失会作动态采样;

采样级别<=2,列的信息缺失不会动态采样。


------------------------聚簇因子--------------------------------------


聚簇因子是索引的一个属性。


select index_name,clustering_factor from user_indexes  --索引的聚簇因子


SQL> select index_name,clustering_factor from user_indexes where index_name='T6_IND';


INDEX_NAME                     CLUSTERING_FACTOR

------------------------------ -----------------

T6_IND                                       376


定义——表示索引中的键值和源表上数据分布的一种关系,当索引键值和表中的数据以及和占用的数据块数量大致相同的时候,意味着索引键值指向数据块越集中,这个因子越小,越有利于索引的使用,相反,当索引键值指向的数据块越多,数据的排列和索引相差很大,这个因子也就越大。


这个指标对执行计划影响是很大的,比如执行计划异常的时候,但是从card上无法选择正确,经常考虑聚簇因子有问题或者直方图对应的列分布不均匀。


聚簇因子是怎样算出来的,索引是创建在列上的,聚簇因子是针对索引的,也是针对列的,所以聚簇因子也是列的属性。


聚簇因子就是列的索引对应的数据行扫描的时候所扫的数据块次。


create or replace function clustering_factor(p_owner in varchar2,p_table_name in varchar2,p_column_name in varchar2) return number is

 l_cursor SYS_REFCURSOR;

 l_clustering_factor integer:=0;

 l_block_nr integer:=0;

 l_file_nr integer:=0;

 l_previous_block_nr integer:=0;

 l_previous_file_nr integer:=0;

begin

 open l_cursor for 'select dbms_rowid.rowid_block_number(rowid) block_nr,'||' dbms_rowid.rowid_to_absolute_fno(rowid,'''||p_owner||''','''||p_table_name||''') file_nr '||' from '||p_owner||'.'||p_table_name||' '||' where '||p_column_name||' is not null '||'order by '||p_column_name;

 loop

   fetch l_cursor into l_block_nr,l_file_nr;

   exit when (l_cursor%notfound);

   if l_block_nr<>l_previous_block_nr or l_file_nr<>l_previous_file_nr then

     l_clustering_factor:=l_clustering_factor+1;

   else

     null;

   end if;

   l_previous_block_nr:=l_block_nr;

   l_previous_file_nr:=l_file_nr;

 end loop;

 close l_cursor;

 return l_clustering_factor;

end;


验证:


SQL> select index_name,clustering_factor from user_indexes where index_name='T6_IND';


INDEX_NAME                     CLUSTERING_FACTOR

------------------------------ -----------------

T6_IND                                       376


select * from user_ind_columns  --查询索引、表、列的对应关系


SQL> select clustering_factor('HR','T6','OBJECT_ID') from dual;


CLUSTERING_FACTOR('HR','T6','OBJECT_ID')

----------------------------------------

                                    376


SQL> select index_name,clustering_factor from user_indexes where index_name='T3_IND';


INDEX_NAME                     CLUSTERING_FACTOR

------------------------------ -----------------

T3_IND                                       353



SQL> select clustering_factor('HR','T5','OBJECT_ID') from dual;


CLUSTERING_FACTOR('HR','T5','OBJECT_ID')

----------------------------------------

                                    353


案例:聚簇因子是怎样影响执行计划的。


SQL> drop table t;


Table dropped.


SQL> create table t as select object_id,object_name from all_objects;


Table created.


SQL> create index t_ind on t(object_id);


Index created.


SQL> drop table t1;


Table dropped.


SQL> create table t1 as select * from t where rownum=1;


Table created.


SQL> alter table t1 minimize records_per_block;  --将表的属性改为在块中尽量存储最少的行。


Table altered.


SQL> insert into t1 select * from t;


40944 rows created.


SQL> commit;


Commit complete.


SQL> create index t1_ind on t1(object_id);


Index created.


SQL> exec dbms_stats.gather_table_stats(user,'t',cascade=>true);


PL/SQL procedure successfully completed.


SQL> exec dbms_stats.gather_table_stats(user,'t1',cascade=>true);


PL/SQL procedure successfully completed.


SQL> select table_name,num_rows,blocks from user_tables where table_name in('T','T1');


TABLE_NAME                       NUM_ROWS     BLOCKS

------------------------------ ---------- ----------

T                                   40944        213

T1                                  40945      21320


SQL> select table_name,index_name,num_rows,leaf_blocks,clustering_factor from user_indexes where table_name in('T','T1');


TABLE_NAME                     INDEX_NAME                       NUM_ROWS LEAF_BLOCKS CLUSTERING_FACTOR

------------------------------ ------------------------------ ---------- ----------- -----------------

T                              T_IND                               40944          91               353

T1                             T1_IND                              40945          91             33469


结论:数据分布在数据块上越多,索引聚合因子也就越大。


SQL> select * from t where object_id<1000;


64 rows selected.



Execution Plan

----------------------------------------------------------

Plan hash value: 1376202287


-------------------------------------------------------------------------------------

| Id  | Operation                   | Name  | Rows  | Bytes | Cost (%CPU)| Time     |

-------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT            |       |   540 | 16200 |     8   (0)| 00:00:01 |

|   1 |  TABLE ACCESS BY INDEX ROWID| T     |   540 | 16200 |     8   (0)| 00:00:01 |

|*  2 |   INDEX RANGE SCAN          | T_IND |   540 |       |     3   (0)| 00:00:01 |

-------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - access("OBJECT_ID"<1000)



Statistics

----------------------------------------------------------

         0  recursive calls

         0  db block gets

        13  consistent gets

         0  physical reads

         0  redo size

      2603  bytes sent via SQL*Net to client

       444  bytes received via SQL*Net from client

         6  SQL*Net roundtrips to/from client

         0  sorts (memory)

         0  sorts (disk)

        64  rows processed


SQL> select * from t1 where object_id<1000;


65 rows selected.



Execution Plan

----------------------------------------------------------

Plan hash value: 2059591622


--------------------------------------------------------------------------------------

| Id  | Operation                   | Name   | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT            |        |   540 | 16200 |   445   (0)| 00:00:06 |

|   1 |  TABLE ACCESS BY INDEX ROWID| T1     |   540 | 16200 |   445   (0)| 00:00:06 |

|*  2 |   INDEX RANGE SCAN          | T1_IND |   540 |       |     3   (0)| 00:00:01 |

--------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - access("OBJECT_ID"<1000)



Statistics

----------------------------------------------------------

         0  recursive calls

         0  db block gets

        42  consistent gets

         0  physical reads

         0  redo size

      2608  bytes sent via SQL*Net to client

       444  bytes received via SQL*Net from client

         6  SQL*Net roundtrips to/from client

         0  sorts (memory)

         0  sorts (disk)

        65  rows processed


结论:执行路径是一样的,聚合因子高的索引产生了更多的一致性读,COST也更大。


如果不用rowid来扫表,此时和聚簇因子没有关系了。


SQL> select object_id from t where object_id<1000;


64 rows selected.



Execution Plan

----------------------------------------------------------

Plan hash value: 422821423


--------------------------------------------------------------------------

| Id  | Operation        | Name  | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------

|   0 | SELECT STATEMENT |       |   540 |  2700 |     3   (0)| 00:00:01 |

|*  1 |  INDEX RANGE SCAN| T_IND |   540 |  2700 |     3   (0)| 00:00:01 |

--------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  1 - access("OBJECT_ID"<1000)



Statistics

----------------------------------------------------------

         1  recursive calls

         0  db block gets

         7  consistent gets

         0  physical reads

         0  redo size

      1339  bytes sent via SQL*Net to client

       444  bytes received via SQL*Net from client

         6  SQL*Net roundtrips to/from client

         0  sorts (memory)

         0  sorts (disk)

        64  rows processed


SQL> select object_id from t1 where object_id<1000;


65 rows selected.



Execution Plan

----------------------------------------------------------

Plan hash value: 2474755989


---------------------------------------------------------------------------

| Id  | Operation        | Name   | Rows  | Bytes | Cost (%CPU)| Time     |

---------------------------------------------------------------------------

|   0 | SELECT STATEMENT |        |   540 |  2700 |     3   (0)| 00:00:01 |

|*  1 |  INDEX RANGE SCAN| T1_IND |   540 |  2700 |     3   (0)| 00:00:01 |

---------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  1 - access("OBJECT_ID"<1000)



Statistics

----------------------------------------------------------

         1  recursive calls

         0  db block gets

         7  consistent gets

         0  physical reads

         0  redo size

      1340  bytes sent via SQL*Net to client

       444  bytes received via SQL*Net from client

         6  SQL*Net roundtrips to/from client

         0  sorts (memory)

         0  sorts (disk)

        65  rows processed



块中的行密度越低,越容易用索引,但是性能不一定最高。



----表列值的存储排列顺序和索引列值的排列顺序一致和非一致对聚簇因子和执行计划的影响-----------


SQL> conn hr/hr

Connected.

SQL> create table t_colocated(id number,col2 varchar2(100));


Table created.


SQL> begin

 2  for i in 1..100000 loop

 3  insert into t_colocated values(i,rpad(dbms_random.random,95,'*'));   --ID列存储的时候是排序的

 4  end loop;

 5  end;

 6  /


PL/SQL procedure successfully completed.


SQL> commit;


Commit complete.


SQL> alter table t_colocated add constraint pk_t_colocated primary key(id);


Table altered.


SQL> create table t_disorganized as select id,col2 from t_colocated order by col2; --ID列是散乱的。


Table created.  --克隆表的方式在表中t_disorganized装入数据


SQL> alter table t_disorganized add constraint pk_t_disorganized primary key(id);


Table altered.


SQL> select table_name,index_name,num_rows,leaf_blocks,clustering_factor from user_indexes where index_name in(upper('pk_t_colocated'),upper('pk_t_disorganized'));


TABLE_NAME                     INDEX_NAME                       NUM_ROWS LEAF_BLOCKS CLUSTERING_FACTOR

------------------------------ ------------------------------ ---------- ----------- -----------------

T_COLOCATED                    PK_T_COLOCATED                     100000         208              1469

T_DISORGANIZED                 PK_T_DISORGANIZED                  100000         208             99935



SQL> select clustering_factor('HR',upper('t_disorganized'),'ID') from dual;


CLUSTERING_FACTOR('HR',UPPER('T_DISORGANIZED'),'ID')

----------------------------------------------------

                                              99935


SQL> select clustering_factor('HR',upper('t_colocated'),'ID') from dual;


CLUSTERING_FACTOR('HR',UPPER('T_COLOCATED'),'ID')

-------------------------------------------------

                                            1469


ID列次序乱了,查询的数据的块不是依次顺序的,聚簇因子也会发生变化,对执行计划的影响。


SQL> set autotrace trace exp

SQL> set linesize 1000

SQL> select * from t_disorganized where id<100;


Execution Plan

----------------------------------------------------------

Plan hash value: 290015569


-------------------------------------------------------------------------------------------------

| Id  | Operation                   | Name              | Rows  | Bytes | Cost (%CPU)| Time     |

-------------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT            |                   |    99 |  6435 |   127   (0)| 00:00:02 |

|   1 |  TABLE ACCESS BY INDEX ROWID| T_DISORGANIZED    |    99 |  6435 |   127   (0)| 00:00:02 |

|*  2 |   INDEX RANGE SCAN          | PK_T_DISORGANIZED |    99 |       |     2   (0)| 00:00:01 |

-------------------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - access("ID"<100)


Note

-----

  - dynamic sampling used for this statement


SQL> select * from t_colocated where id<100;


Execution Plan

----------------------------------------------------------

Plan hash value: 4204525375


----------------------------------------------------------------------------------------------

| Id  | Operation                   | Name           | Rows  | Bytes | Cost (%CPU)| Time     |

----------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT            |                |    99 |  6435 |     4   (0)| 00:00:01 |

|   1 |  TABLE ACCESS BY INDEX ROWID| T_COLOCATED    |    99 |  6435 |     4   (0)| 00:00:01 |

|*  2 |   INDEX RANGE SCAN          | PK_T_COLOCATED |    99 |       |     2   (0)| 00:00:01 |

----------------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - access("ID"<100)


Note

-----

  - dynamic sampling used for this statement


-------------------------dbms_stats包的详细用法------------------------------------

DBMS_STATS.GATHER_TABLE_STATS (

  ownname          VARCHAR2,

  tabname          VARCHAR2,

  partname         VARCHAR2 DEFAULT NULL,

  estimate_percent NUMBER   DEFAULT to_estimate_percent_type

                                               (get_param('ESTIMATE_PERCENT')),

  block_sample     BOOLEAN  DEFAULT FALSE,

  method_opt       VARCHAR2 DEFAULT get_param('METHOD_OPT'),

  degree           NUMBER   DEFAULT to_degree_type(get_param('DEGREE')),

  granularity      VARCHAR2 DEFAULT GET_PARAM('GRANULARITY'),

  cascade          BOOLEAN  DEFAULT to_cascade_type(get_param('CASCADE')),

  stattab          VARCHAR2 DEFAULT NULL,

  statid           VARCHAR2 DEFAULT NULL,

  statown          VARCHAR2 DEFAULT NULL,

  no_invalidate    BOOLEAN  DEFAULT  to_no_invalidate_type (

                                    get_param('NO_INVALIDATE')),

  force            BOOLEAN DEFAULT FALSE);


degree——分析的并行度


并行——几个进程来一起完成这个任务。


SQL> exec dbms_stats.gather_table_stats(user,'t_disorganized',cascade=>true,degree=>5);  --5个子进程一起完成这个分析


PL/SQL procedure successfully completed.


对象本身有并行度:

SQL> select table_name,degree from user_tables;


TABLE_NAME                     DEGREE

------------------------------ --------------------

XX2                                     1

TX                                      1

T_COLOCATED                             1

T2                                      1

T3                                      1

T4                                      1

T5                                      1

T6                                      1

EMP                                     1

T7                                      1

T8                                      1

TEST1                                   1

TEST                                    1

T_DISORGANIZED                          1

T1                                      1

TT                                      1

T                                       1

COUNTRIES                               1

JOBS                                    1

LOCATIONS                               1

REGIONS                                 1

JOB_HISTORY                             1

EMPLOYEES                               1

DEPARTMENTS                             1


24 rows selected.


默认的在表上的并行度都是1,也就是不并行。所以在分析的时候要指定degree参数。


SQL> alter table t_disorganized parallel 5;


Table altered.


SQL> select table_name,degree from user_tables where table_name='T_DISORGANIZED';


TABLE_NAME                     DEGREE

------------------------------ --------------------

T_DISORGANIZED                          5


SQL> exec dbms_stats.gather_table_stats(user,'t_disorganized',cascade=>true);


PL/SQL procedure successfully completed. --此时不用指定了,默认是按照5的并行度来执行的。


granularity——参数

'ALL' - 对表的全局作分析,包括分区和子分区。

'AUTO'- 缺省,oracle自动根据分区的类型来决定用哪种粒度来分析

'DEFAULT' - 10g中这个参数是废弃的。

'GLOBAL' - 分析表的全局,不包括分区和子分区。

'GLOBAL AND PARTITION' - 对全局分析和分区分析,不分析子分区

'PARTITION '- 仅仅分析分区

'SUBPARTITION' - 仅仅分析子分区


案例:

SQL> create table ttx(id int) partition by range(id)

 2  (partition p1 values less than(5),

 3  partition p2 values less than(10),

 4  partition p3 values less than(15)

 5  );


Table created.


SQL> select segment_name,partition_name,segment_type from user_segments where segment_name='TTX';


SEGMENT_NAME         PARTITION_NAME                 SEGMENT_TYPE

--------------------------------------------------------------------------------- ------------------------------ ------

TTX                  P1                             TABLE PARTITION

TTX                  P2                             TABLE PARTITION

TTX                  P3                             TABLE PARTITION


物理上这个表有三个segment,根据ID的值来选择存放的段。


数据的存储:

SQL> insert into ttx values(1);


1 row created.


SQL> insert into ttx values(6);


1 row created.


SQL> insert into ttx values(11);


1 row created.


SQL> commit;


Commit complete.


SQL> select * from ttx;


       ID

----------

        1

        6

       11


SQL> select * from ttx partition(p1);


       ID

----------

        1


SQL> select * from ttx partition(p2);


       ID

----------

        6


SQL> select * from ttx partition(p3);


       ID

----------

       11


索引在分区表上如何存储的。


SQL> select segment_name,partition_name,segment_type from user_segments where segment_name='IND_TTX';   --全局索引


SEGMENT_NAME        PARTITION_NAME                 SEGMENT_TYPE

--------------------------------------------------------------------------------- --------------------------

IND_TTX                                            INDEX



SQL> drop index ind_ttx;


Index dropped.


SQL> create index ind_ttx on ttx(id) local;


Index created.


SQL> select segment_name,partition_name,segment_type from user_segments where segment_name='IND_TTX';


SEGMENT_NAME      PARTITION_NAME                 SEGMENT_TYPE

--------------------------------------------------------------------------------- ----------------------------

IND_TTX           P1                             INDEX PARTITION

IND_TTX           P2                             INDEX PARTITION

IND_TTX           P3                             INDEX PARTITION


分区索引(本地索引),根据表的分区情况将索引分区,3个segment


分析:

SQL> exec dbms_stats.gather_table_stats(user,'ttx',cascade=>true);


PL/SQL procedure successfully completed.


确认表和分区的统计信息都存在:


表的全局统计信息:

SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';


 NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

---------- ----------- ---------- -------------------

        3           3         15 2013-08-09 10:07:59


表的分区统计信息:

SQL> select partition_name,num_rows,avg_row_len,blocks,last_analyzed from user_tab_partitions where table_name='TTX';


PARTITION_NAME                   NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

------------------------------ ---------- ----------- ---------- -------------------

P1                                      1           3          5 2013-08-09 10:07:59

P2                                      1           3          5 2013-08-09 10:07:59

P3                                      1           3          5 2013-08-09 10:07:59


每个分区有1行数据,每个分区段都有一个extent,每个extent有8个数据块:

select * from user_extents where partition_name='P1'


但是为什么统计显示是5个数据块?前三个块是位图。


确认下索引和索引分区的统计信息是存在的。

SQL> select blevel,leaf_blocks,distinct_keys,last_analyzed from user_indexes where table_name='TTX';


   BLEVEL LEAF_BLOCKS DISTINCT_KEYS LAST_ANALYZED

---------- ----------- ------------- -------------------

        0           3             3 2013-08-09 10:07:59



SQL> select partition_name,num_rows,blevel,leaf_blocks,last_analyzed from user_ind_partitions where index_name='IND_TTX';


PARTITION_NAME                   NUM_ROWS     BLEVEL LEAF_BLOCKS LAST_ANALYZED

------------------------------ ---------- ---------- ----------- -------------------

P1                                      1          0           1 2013-08-09 10:07:59

P2                                      1          0           1 2013-08-09 10:07:59

P3                                      1          0           1 2013-08-09 10:07:59


给表添加一个分区

SQL> alter table ttx add partition pm values less than(maxvalue);


Table altered.


给新的分区添加数据


SQL> begin

 2  for i in 1..10000 loop

 3  insert into ttx values(16);

 4  end loop;

 5  end;

 6  /


PL/SQL procedure successfully completed.


SQL> commit;


Commit complete.


SQL> select count(1) from ttx partition(pm);


 COUNT(1)

----------

    10000


看分区索引的统计信息:

SQL> select partition_name,num_rows,blevel,leaf_blocks,last_analyzed from user_ind_partitions where index_name='IND_TTX';


PARTITION_NAME                   NUM_ROWS     BLEVEL LEAF_BLOCKS LAST_ANALYZED

------------------------------ ---------- ---------- ----------- -------------------

P1                                      1          0           1 2013-08-09 10:07:59

P2                                      1          0           1 2013-08-09 10:07:59

P3                                      1          0           1 2013-08-09 10:07:59

PM   --新增的分区是没有统计信息的


看表的全局信息

SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';


 NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

---------- ----------- ---------- -------------------

        3           3         15 2013-08-09 10:07:59    --表的全局信息没有变化


执行计划的影响:

SQL> select * from ttx where id=16;


Execution Plan

----------------------------------------------------------

Plan hash value: 3737425109


--------------------------------------------------------------------------------------------------

| Id  | Operation              | Name    | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |

--------------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT       |         |     1 |     3 |     1   (0)| 00:00:01 |       |       |

|   1 |  PARTITION RANGE SINGLE|         |     1 |     3 |     1   (0)| 00:00:01 |     4 |     4 |

|*  2 |   INDEX RANGE SCAN     | IND_TTX |     1 |     3 |     1   (0)| 00:00:01 |     4 |     4 |

--------------------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - access("ID"=16)


Pstart——查询扫描读取分区的起始分区号

Pstop——查询扫描读取分区的结束分区号


ROWS的估计值是严重失误的,没有分析新增加的分区,导致CBO作出了错误的判断。


A 只对分区作分析,不对全局作分析

SQL> exec dbms_stats.gather_table_stats(user,'ttx',partname=>'pm',estimate_percent=>100,granularity=>'partition');


PL/SQL procedure successfully completed.


查看分区的信息:


SQL> select partition_name,num_rows,blevel,leaf_blocks,last_analyzed from user_ind_partitions where index_name='IND_TTX';


PARTITION_NAME                   NUM_ROWS     BLEVEL LEAF_BLOCKS LAST_ANALYZED

------------------------------ ---------- ---------- ----------- -------------------

P1                                      1          0           1 2013-08-09 10:07:59

P2                                      1          0           1 2013-08-09 10:07:59

P3                                      1          0           1 2013-08-09 10:07:59

PM                                  10000          1          27 2013-08-09 10:23:32


SQL> select partition_name,num_rows,avg_row_len,blocks,last_analyzed from user_tab_partitions where table_name='TTX';


PARTITION_NAME                   NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

------------------------------ ---------- ----------- ---------- -------------------

P1                                      1           3          5 2013-08-09 10:07:59

P2                                      1           3          5 2013-08-09 10:07:59

P3                                      1           3          5 2013-08-09 10:07:59

PM                                  10000           3         20 2013-08-09 10:23:32


确认全局信息没有分析:

SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';


 NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

---------- ----------- ---------- -------------------

        3           3         15 2013-08-09 10:07:59


SQL> select blevel,leaf_blocks,distinct_keys,last_analyzed from user_indexes where table_name='TTX';


   BLEVEL LEAF_BLOCKS DISTINCT_KEYS LAST_ANALYZED

---------- ----------- ------------- -------------------

        0           3             3 2013-08-09 10:07:59


SQL> select * from ttx where id=16;  --此时执行计划是正确的


Execution Plan

----------------------------------------------------------

Plan hash value: 701592076


-----------------------------------------------------------------------------------------------

| Id  | Operation              | Name | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |

-----------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT       |      |  9999 | 29997 |     6   (0)| 00:00:01 |       |       |

|   1 |  PARTITION RANGE SINGLE|      |  9999 | 29997 |     6   (0)| 00:00:01 |     4 |     4 |

|*  2 |   TABLE ACCESS FULL    | TTX  |  9999 | 29997 |     6   (0)| 00:00:01 |     4 |     4 |

-----------------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - filter("ID"=16)


没有对全局分析执行计划计划在什么情况下会异常?


对全局数据作出变化:

SQL> alter table ttx add object_name varchar2(20);


Table altered.


SQL> update ttx set object_name='AAAA';


10003 rows updated.


SQL> commit;


Commit complete.


SQL> create index ind2_ttx on ttx(object_name);


Index created.


SQL> exec dbms_stats.gather_table_stats(user,'ttx',partname=>'pm',estimate_percent=>100,granularity=>'partition');


PL/SQL procedure successfully completed. --只分析分区不分析全局


SQL> select * from ttx where id=16;  --查询ID列是没有问题的


Execution Plan

----------------------------------------------------------

Plan hash value: 701592076


-----------------------------------------------------------------------------------------------

| Id  | Operation              | Name | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |

-----------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT       |      |  9999 | 7××× |    11   (0)| 00:00:01 |       |       |

|   1 |  PARTITION RANGE SINGLE|      |  9999 | 7××× |    11   (0)| 00:00:01 |     4 |     4 |

|*  2 |   TABLE ACCESS FULL    | TTX  |  9999 | 7××× |    11   (0)| 00:00:01 |     4 |     4 |

-----------------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - filter("ID"=16)


SQL> select * from ttx where object_name='AAAA';  --查询name列错误了。


Execution Plan

----------------------------------------------------------

Plan hash value: 1144724227


--------------------------------------------------------------------------------------------

| Id  | Operation           | Name | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |

--------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT    |      |     3 |     9 |     5   (0)| 00:00:01 |       |       |

|   1 |  PARTITION RANGE ALL|      |     3 |     9 |     5   (0)| 00:00:01 |     1 |     4 | --表示扫描分区1~4

|*  2 |   TABLE ACCESS FULL | TTX  |     3 |     9 |     5   (0)| 00:00:01 |     1 |     4 |

--------------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - filter("OBJECT_NAME"='AAAA')


Note

-----

  - dynamic sampling used for this statement


只对分区分析不对全局分析,查询只是涉及分区的时候,执行计划是没有问题的。查询涉及全局的时候,执行计划是错误的。


SQL> exec dbms_stats.gather_table_stats(user,'ttx',estimate_percent=>100,granularity=>'global');  --全局分析


PL/SQL procedure successfully completed.


SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';


 NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

---------- ----------- ---------- -------------------

    10003           8         58 2013-08-09 10:31:14


SQL> select blevel,leaf_blocks,distinct_keys,last_analyzed from user_indexes where table_name='TTX';


   BLEVEL LEAF_BLOCKS DISTINCT_KEYS LAST_ANALYZED

---------- ----------- ------------- -------------------

        1          30             5 2013-08-09 10:31:14

        1          28             1 2013-08-09 10:31:14


SQL> select * from ttx where object_name='AAAA';


Execution Plan

----------------------------------------------------------

Plan hash value: 1144724227


--------------------------------------------------------------------------------------------

| Id  | Operation           | Name | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |

--------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT    |      | 10003 | 80024 |    14   (0)| 00:00:01 |       |       |

|   1 |  PARTITION RANGE ALL|      | 10003 | 80024 |    14   (0)| 00:00:01 |     1 |     4 |

|*  2 |   TABLE ACCESS FULL | TTX  | 10003 | 80024 |    14   (0)| 00:00:01 |     1 |     4 |

--------------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - filter("OBJECT_NAME"='AAAA')


总结:即使分区级别有信息了,但是没有对全局分析,当查询涉及到全局数据,而全局数据在上一次分析之后有数据或者结构的变化的时候,依然导致错误的执行计划。


B 只对全局作分析,不对分区作分析


删除最后一个分区的数据

SQL> delete from ttx where id>15;


10000 rows deleted.


SQL> commit;


Commit complete.


先对分区和全局都分析


SQL> exec dbms_stats.gather_table_stats(user,'ttx',estimate_percent=>100,cascade=>true);


PL/SQL procedure successfully completed.


SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';


 NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

---------- ----------- ---------- -------------------

        3           8         58 2013-08-09 10:35:43


SQL> select partition_name,num_rows,avg_row_len,blocks,last_analyzed from user_tab_partitions where table_name='TTX';


PARTITION_NAME                   NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

------------------------------ ---------- ----------- ---------- -------------------

P1                                      1           8          5 2013-08-09 10:35:43

P2                                      1           8          5 2013-08-09 10:35:43

P3                                      1           8          5 2013-08-09 10:35:43

PM                                      0           0         43 2013-08-09 10:35:43


往第四个分区中加数据


SQL> begin

 2  for i in 1..10000 loop

 3  insert into ttx values(16,'');

 4  end loop;

 5  end;

 6  /


PL/SQL procedure successfully completed.


SQL> commit;


Commit complete.


让数据严重倾斜

SQL> update ttx set id=1000 where id=16 and rownum=1;


1 row updated.


SQL> commit;


Commit complete.


只对全局作分析不对分区作分析,且不做直方图分析


SQL> exec dbms_stats.gather_table_stats(user,'ttx',estimate_percent=>100,granularity=>'global',method_opt=>'for all columns size 1',cascade=>true);


PL/SQL procedure successfully completed.


确认全局信息是新的,分区没有分析:


SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';


 NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

---------- ----------- ---------- -------------------

    10003           4         58 2013-08-09 10:41:08


SQL> select blevel,leaf_blocks,distinct_keys,last_analyzed from user_indexes where table_name='TTX';


   BLEVEL LEAF_BLOCKS DISTINCT_KEYS LAST_ANALYZED

---------- ----------- ------------- -------------------

        1          31             5 2013-08-09 10:41:08

        1           1             1 2013-08-09 10:41:08


SQL>  select partition_name,num_rows,avg_row_len,blocks,last_analyzed from user_tab_partitions where table_name='TTX';


PARTITION_NAME                   NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

------------------------------ ---------- ----------- ---------- -------------------

P1                                      1           8          5 2013-08-09 10:35:43

P2                                      1           8          5 2013-08-09 10:35:43

P3                                      1           8          5 2013-08-09 10:35:43

PM                                      0           0         43 2013-08-09 10:35:43


SQL> select partition_name,num_rows,blevel,leaf_blocks,last_analyzed from user_ind_partitions where index_name='IND_TTX';


PARTITION_NAME                   NUM_ROWS     BLEVEL LEAF_BLOCKS LAST_ANALYZED

------------------------------ ---------- ---------- ----------- -------------------

P1                                      1          0           1 2013-08-09 10:35:43

P2                                      1          0           1 2013-08-09 10:35:43

P3                                      1          0           1 2013-08-09 10:35:43

PM                                      0          1           0 2013-08-09 10:35:43


SQL> select * from ttx where id=16;


Execution Plan

----------------------------------------------------------

Plan hash value: 2572159449


--------------------------------------------------------------------------------------------------------------

| Id  | Operation                          | Name    | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |

--------------------------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT                   |         |     1 |    25 |     1   (0)| 00:00:01 |       |       |

|   1 |  PARTITION RANGE SINGLE            |         |     1 |    25 |     1   (0)| 00:00:01 |     4 |     4 |

|   2 |   TABLE ACCESS BY LOCAL INDEX ROWID| TTX     |     1 |    25 |     1   (0)| 00:00:01 |     4 |     4 |

|*  3 |    INDEX RANGE SCAN                | IND_TTX |     1 |       |     1   (0)| 00:00:01 |     4 |     4 |

--------------------------------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  3 - access("ID"=16)


加上直方图信息。


SQL> exec dbms_stats.gather_table_stats(user,'ttx',estimate_percent=>100,granularity=>'global',method_opt=>'for all columns size skewonly',cascade=>true);


PL/SQL procedure successfully completed.


select * from user_tab_col_statistics where table_name='TTX'  --确认有直方图信息了。


SQL> select * from ttx where id=16;


Execution Plan

----------------------------------------------------------

Plan hash value: 2572159449


--------------------------------------------------------------------------------------------------------------

| Id  | Operation                          | Name    | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |

--------------------------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT                   |         |     1 |    25 |     1   (0)| 00:00:01 |       |       |

|   1 |  PARTITION RANGE SINGLE            |         |     1 |    25 |     1   (0)| 00:00:01 |     4 |     4 |

|   2 |   TABLE ACCESS BY LOCAL INDEX ROWID| TTX     |     1 |    25 |     1   (0)| 00:00:01 |     4 |     4 |

|*  3 |    INDEX RANGE SCAN                | IND_TTX |     1 |       |     1   (0)| 00:00:01 |     4 |     4 |

--------------------------------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  3 - access("ID"=16)


执行计划还是错误的,说明主要不是直方图引起的。主要还是因为没有收集分区的信息导致的。


SQL> exec dbms_stats.gather_table_stats(user,'ttx',estimate_percent=>100,granularity=>'partition',method_opt=>'for all columns size 1',cascade=>true);


PL/SQL procedure successfully completed.  分析分区但是不收集直方图。


SQL> select partition_name,num_rows,avg_row_len,blocks,last_analyzed from user_tab_partitions where table_name='TTX';


PARTITION_NAME                   NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

------------------------------ ---------- ----------- ---------- -------------------

P1                                      1           8          5 2013-08-09 10:46:33

P2                                      1           8          5 2013-08-09 10:46:33

P3                                      1           8          5 2013-08-09 10:46:33

PM                                  10000           3         43 2013-08-09 10:46:33


SQL> select partition_name,num_rows,blevel,leaf_blocks,last_analyzed from user_ind_partitions where index_name='IND_TTX';


PARTITION_NAME                   NUM_ROWS     BLEVEL LEAF_BLOCKS LAST_ANALYZED

------------------------------ ---------- ---------- ----------- -------------------

P1                                      1          0           1 2013-08-09 10:46:33

P2                                      1          0           1 2013-08-09 10:46:33

P3                                      1          0           1 2013-08-09 10:46:33

PM                                  10000          1          28 2013-08-09 10:46:33


SQL> select * from ttx where id=16;


Execution Plan

----------------------------------------------------------

Plan hash value: 701592076


-----------------------------------------------------------------------------------------------

| Id  | Operation              | Name | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |

-----------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT       |      |  5000 | 15000 |    11   (0)| 00:00:01 |       |       |

|   1 |  PARTITION RANGE SINGLE|      |  5000 | 15000 |    11   (0)| 00:00:01 |     4 |     4 |

|*  2 |   TABLE ACCESS FULL    | TTX  |  5000 | 15000 |    11   (0)| 00:00:01 |     4 |     4 |

-----------------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - filter("ID"=16)


bucket只有1个,分区中,最大值是1000,最小值是16,共10000行,按照均匀分布,各5000行。


SQL> exec dbms_stats.gather_table_stats(user,'ttx',estimate_percent=>100,granularity=>'partition',method_opt=>'for all columns size skewonly',cascade=>true);  --重新收集直方图信息


PL/SQL procedure successfully completed.


SQL> select * from ttx where id=16;


Execution Plan

----------------------------------------------------------

Plan hash value: 701592076


-----------------------------------------------------------------------------------------------

| Id  | Operation              | Name | Rows  | Bytes | Cost (%CPU)| Time     | Pstart| Pstop |

-----------------------------------------------------------------------------------------------

|   0 | SELECT STATEMENT       |      |  9999 | 29997 |    11   (0)| 00:00:01 |       |       |

|   1 |  PARTITION RANGE SINGLE|      |  9999 | 29997 |    11   (0)| 00:00:01 |     4 |     4 |

|*  2 |   TABLE ACCESS FULL    | TTX  |  9999 | 29997 |    11   (0)| 00:00:01 |     4 |     4 |

-----------------------------------------------------------------------------------------------


Predicate Information (identified by operation id):

---------------------------------------------------


  2 - filter("ID"=16)


必须对全局和分析都分析才能得到精确的执行计划,但是上面的例子是很极端的例子,

将所有可能出现的问题都放大了,优化效果很明显。


但是实际的环境中,系统的繁忙程度决定了我们可能不允许时时对大表作全局分析,采取策略?



建议的策略:

1、所有的SQL语句是不是仅仅在分区完成的,如果是,那么没有必要去分析全局,只分析新增的分区即可;

  如果有些SQL是跨分区查询的,考虑新增分区的数据是不是占比很大,如果仅仅很少比列的数据,

  可以考虑不进行全局分析,如果薪增的分区占比很大,就需要全局分析+分区分析。

2、如果企业的数据库本身容量就不大,全局分析+分区分析也未尝不可。

3、如果列值分布均匀,考虑将直方图分析取消掉,减小全局分析的压力。

------------------------------------------------------------------------

dbms_stats中保存分析数据的方法:


A 创建一个保存性能数据的表


SQL> exec dbms_stats.create_stat_table(user,'stat_tab','mytbs3');


PL/SQL procedure successfully completed.


SQL> set linesize 100

SQL> desc stat_tab  --这个表用来转存性能数据的,不需要关注字段的含义

Name                                                  Null?    Type

----------------------------------------------------- -------- ------------------------------------

STATID                                                         VARCHAR2(30)

TYPE                                                           CHAR(1)

VERSION                                                        NUMBER

FLAGS                                                          NUMBER

C1                                                             VARCHAR2(30)

C2                                                             VARCHAR2(30)

C3                                                             VARCHAR2(30)

C4                                                             VARCHAR2(30)

C5                                                             VARCHAR2(30)

N1                                                             NUMBER

N2                                                             NUMBER

N3                                                             NUMBER

N4                                                             NUMBER

N5                                                             NUMBER

N6                                                             NUMBER

N7                                                             NUMBER

N8                                                             NUMBER

N9                                                             NUMBER

N10                                                            NUMBER

N11                                                            NUMBER

N12                                                            NUMBER

D1                                                             DATE

R1                                                             RAW(32)

R2                                                             RAW(32)

CH1                                                            VARCHAR2(1000)


B 删除这个表的话。


SQL> exec dbms_stats.drop_stat_table(user,'stat_tab');


PL/SQL procedure successfully completed.


SQL> desc stat_tab

ERROR:

ORA-04043: object stat_tab does not exist


SQL> exec dbms_stats.create_stat_table(user,'stat_tab','mytbs3');


PL/SQL procedure successfully completed.


C 保存数据——收集性能数据的时候同时将性能数据保存在这个表中。


场景A:收集性能数据的时候转存性能数据。


SQL> exec dbms_stats.gather_table_stats(user,'ttx',stattab=>'stat_tab',cascade=>true);


PL/SQL procedure successfully completed.


SQL> select count(1) from stat_tab;


 COUNT(1)

----------

       26


场景B:如果在数据字典表已经有了性能数据,现在不能再去收集,可以将已经有的性能数据导入到这个表中,表已经建好。


SQL> truncate table stat_tab;


Table truncated.


SQL> exec dbms_stats.export_table_stats(user,'ttx',stattab=>'stat_tab',cascade=>true);


PL/SQL procedure successfully completed.


SQL> select count(1) from stat_tab;


 COUNT(1)

----------

       26


一个用户下,stattab只要一个就够了,所有表、索引、列的性能数据都能存储在这个表中。


场景C:误删除了TTX表的性能数据,用这个表的数据来恢复


SQL> exec dbms_stats.delete_table_stats(user,'ttx');


PL/SQL procedure successfully completed.


SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';


 NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

---------- ----------- ---------- -------------------


将性能数据恢复

SQL> exec dbms_stats.import_table_stats(user,'ttx',stattab=>'stat_tab',cascade=>true);


PL/SQL procedure successfully completed.


SQL> select num_rows,avg_row_len,blocks,last_analyzed from user_tables where table_name='TTX';


 NUM_ROWS AVG_ROW_LEN     BLOCKS LAST_ANALYZED

---------- ----------- ---------- -------------------

    10003           4         58 2013-08-09 11:25:43


数据迁移的时候和数据切割的时候,性能数据的处理。


--------------------------------

锁定分析数据

当一个执行计划被调试稳定之后,希望性能数据能保存下来,不被其他用户修改,可以将性能数据锁定。


SQL> exec dbms_stats.lock_table_stats(user,'ttx');


PL/SQL procedure successfully completed.


再想分析的话就会报错的。


SQL> exec dbms_stats.gather_table_stats(user,'ttx',cascade=>true);

BEGIN dbms_stats.gather_table_stats(user,'ttx',cascade=>true); END;


*

ERROR at line 1:

ORA-20005: object statistics are locked (stattype = ALL)

ORA-06512: at "SYS.DBMS_STATS", line 13437

ORA-06512: at "SYS.DBMS_STATS", line 13457

ORA-06512: at line 1


解决办法:

1、解锁

SQL> exec dbms_stats.unlock_table_stats(user,'ttx');


PL/SQL procedure successfully completed.


SQL> exec dbms_stats.gather_table_stats(user,'ttx',cascade=>true);


PL/SQL procedure successfully completed.


2、强制覆盖

SQL> exec dbms_stats.gather_table_stats(user,'ttx',cascade=>true,force=>true);


PL/SQL procedure successfully completed.

----------------------------------------------

设置性能数据


使用场合:

1、当相应的性能数据指标不正确的时候,导致执行计划失败,或者当前不允许收集分析信息,可以手工设置性能数据。

2、在测试环境中或者开发环境中,调试执行计划的时候,可以通过此方法模拟生产的性能数据。


目的:作执行计划的调试。


设置表的性能数据

DBMS_STATS.SET_TABLE_STATS (

  ownname       VARCHAR2,

  tabname       VARCHAR2,

  partname      VARCHAR2 DEFAULT NULL,

  stattab       VARCHAR2 DEFAULT NULL,

  statid        VARCHAR2 DEFAULT NULL,

  numrows       NUMBER   DEFAULT NULL,

  numblks       NUMBER   DEFAULT NULL,

  avgrlen       NUMBER   DEFAULT NULL,

  flags         NUMBER   DEFAULT NULL,

  statown       VARCHAR2 DEFAULT NULL,

  no_invalidate BOOLEAN  DEFAULT to_no_invalidate_type (

                                    get_param('NO_INVALIDATE')),

  cachedblk     NUMBER    DEFAULT NULL,

  cachehit      NUMBER    DEFUALT NULL,

  force         BOOLEAN   DEFAULT FALSE);


设置列的性能数据

DBMS_STATS.SET_COLUMN_STATS (

  ownname       VARCHAR2,

  tabname       VARCHAR2,

  colname       VARCHAR2,

  partname      VARCHAR2 DEFAULT NULL,

  stattab       VARCHAR2 DEFAULT NULL,

  statid        VARCHAR2 DEFAULT NULL,

  distcnt       NUMBER DEFAULT NULL,

  density       NUMBER DEFAULT NULL,

  nullcnt       NUMBER DEFAULT NULL,

  srec          StatRec DEFAULT NULL,

  avgclen       NUMBER DEFAULT NULL,

  flags         NUMBER DEFAULT NULL,

  statown       VARCHAR2 DEFAULT NULL,

  no_invalidate BOOLEAN DEFAULT to_no_invalidate_type(

                                   get_param('NO_INVALIDATE')),

  force         BOOLEAN DEFAULT FALSE);


设置索引的性能数据

DBMS_STATS.SET_INDEX_STATS (

  ownname       VARCHAR2,

  indname       VARCHAR2,

  partname      VARCHAR2  DEFAULT NULL,

  stattab       VARCHAR2  DEFAULT NULL,

  statid        VARCHAR2  DEFAULT NULL,

  numrows       NUMBER    DEFAULT NULL,

  numlblks      NUMBER    DEFAULT NULL,

  numdist       NUMBER    DEFAULT NULL,

  avglblk       NUMBER    DEFAULT NULL,

  avgdblk       NUMBER    DEFAULT NULL,

  clstfct       NUMBER    DEFAULT NULL,

  indlevel      NUMBER    DEFAULT NULL,

  flags         NUMBER    DEFAULT NULL,

  statown       VARCHAR2  DEFAULT NULL,

  no_invalidate BOOLEAN   DEFAULT to_no_invalidate_type(

                                   get_param('NO_INVALIDATE')),

  guessq        NUMBER    DEFAULT NULL,

  cachedblk     NUMBER    DEFAULT NULL,

  cachehit      NUMBER    DEFUALT NULL,

  force         BOOLEAN   DEFAULT FALSE);



SQL> drop table t;


Table dropped.


SQL> create table t as select * from all_objects;


Table created.


SQL> exec dbms_stats.gather_table_stats(user,'t');


PL/SQL procedure successfully completed.


SQL> select * from t;  --测试环境


Execution Plan

----------------------------------------------------------

Plan hash value: 1601196873


--------------------------------------------------------------------------

| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------

|   0 | SELECT STATEMENT  |      | 40961 |  3800K|   134   (3)| 00:00:02 |

|   1 |  TABLE ACCESS FULL| T    | 40961 |  3800K|   134   (3)| 00:00:02 |

--------------------------------------------------------------------------


但是这个表在生产机上有1000万数据。


SQL> exec dbms_stats.set_table_stats(user,'t',numrows=>10000000,numblks=>1000000,avgrlen=>178);


PL/SQL procedure successfully completed.


SQL> select * from t;


Execution Plan

----------------------------------------------------------

Plan hash value: 1601196873


--------------------------------------------------------------------------

| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |

--------------------------------------------------------------------------

|   0 | SELECT STATEMENT  |      |    10M|   953M|   220K  (1)| 00:44:04 |

|   1 |  TABLE ACCESS FULL| T    |    10M|   953M|   220K  (1)| 00:44:04 |

--------------------------------------------------------------------------


-----------------------------------------------

------------------------

表的连接原理