场景:一个字段,重复值占的比较还是比较小的大约为1/4,由于执行计划的预估结果集与实际存较大偏差,导致连接方式错误,所以想通过直方图来纠正,结果发现无效果;
原因在于唯一值占总行的比例过高造成;
SQL> create table t1 as select object_id,object_name from dba_objects where object_id is not null;
Table created.
SQL> truncate table t1;
Table truncated.
--构造唯一值较少时数据
SQL> insert into t1 select object_id,object_name from dba_objects where object_id is not null and rownum<220;
219 rows created.
SQL> commit;
Commit complete.
--构造一些倾斜的数值:88,89分别为33行,31行;
SQL> update t1 set object_id=88 where object_id <35;
33 rows updated.
SQL> commit;
Commit complete.
SQL> update t1 set object_id=89 where object_id between 180 and 210;
31 rows updated.
SQL> commit;
Commit complete.
SQL> exec dbms_stats.gather_table_stats('AIKI','T1',CASCADE=>TRUE,METHOD_OPT=>'FOR COLUMNS SIZE 75 OBJECT_ID',ESTIMATE_PERCENT=>100);
PL/SQL procedure successfully completed.
--更新桶数为75
SQL> @sosi
Tables owned by AIKI
------------------------------
PT
T1
Please enter Name of Table Owner (Null = AIKI):
Please enter Table Name to show Statistics for: T1
***********
Table Level
***********
Table Number Empty Average Chain Average Global User Sample Date
Name of Rows Blocks Blocks Space Count Row Len Stats Stats Size MM-DD-YYYY
--------------- -------------- -------- ------------ ------- -------- ------- ------ ------ -------------- ----------
T1 219 5 0 0 0 15 YES NO 219 05-05-2014
Column Column Distinct Number Number Global User Sample Date
Name Details Values Density Buckets Nulls Stats Stats Size MM-DD-YYYY
------------------------- ------------------------ ------------ ------- ------- ---------- ------ ------ -------------- ----------
OBJECT_ID NUMBER(22) 155 0 75 0 YES NO 219 05-05-2014
OBJECT_NAME VARCHAR2(128) NO NO
***************
Partition Level
***************
***************
SubPartition Level
***************
SQL> select TABLE_NAME,COLUMN_NAME,HISTOGRAM from dba_tab_columns where table_name='T1' and owner='AIKI';
Table Column
Name Name HISTOGRAM
--------------- ------------------------- ---------------
T1 OBJECT_ID HEIGHT BALANCED
T1 OBJECT_NAME NONE
--可以看到预估值与实际非常接近,但唯一值占整个表的比例较小时;
SQL> set autot traceonly exp
SQL> select object_name from t1 where object_id=88;
Execution Plan
----------------------------------------------------------
Plan hash value: 3617692013
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 35 | 525 | 3 (0)| 00:00:01 |
|* 1 | TABLE ACCESS FULL| T1 | 35 | 525 | 3 (0)| 00:00:01 |
--------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("OBJECT_ID"=88)
SQL> select count(1) from t1 where object_id=88;
Execution Plan
----------------------------------------------------------
Plan hash value: 3724264953
---------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 4 | 3 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 4 | | |
|* 2 | TABLE ACCESS FULL| T1 | 35 | 140 | 3 (0)| 00:00:01 |
---------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("OBJECT_ID"=88)
--可以看到预估与实际非常接近;
SQL> set autot off
SQL> l
1* select count(1) from t1 where object_id=88
SQL> /
COUNT(1)
----------
34
--将唯一值所占总行的比例增大,增加插入10000行
SQL> insert into t1 select object_id,object_name from dba_objects where object_id is not null and rownum<10001;
SQL> commit;
SQL> exec dbms_stats.gather_table_stats('AIKI','T1',CASCADE=>TRUE,METHOD_OPT=>'FOR COLUMNS SIZE 254 OBJECT_ID',ESTIMATE_PERCENT=>100);
SQL> @sosi
SQL> set echo off
---桶数统计为254
Tables owned by AIKI
------------------------------
PT
T1
Please enter Name of Table Owner (Null = AIKI):
Please enter Table Name to show Statistics for: T1
***********
Table Level
***********
Table Number Empty Average Chain Average Global User Sample Date
Name of Rows Blocks Blocks Space Count Row Len Stats Stats Size MM-DD-YYYY
--------------- -------------- -------- ------------ ------- -------- ------- ------ ------ -------------- ----------
T1 10,219 43 0 0 0 22 YES NO 10,219 05-05-2014
Column Column Distinct Number Number Global User Sample Date
Name Details Values Density Buckets Nulls Stats Stats Size MM-DD-YYYY
------------------------- ------------------------ ------------ ------- ------- ---------- ------ ------ -------------- ----------
OBJECT_ID NUMBER(22) 10,000 0 254 0 YES NO 10,219 05-05-2014
OBJECT_NAME VARCHAR2(128) NO NO
***************
Partition Level
***************
***************
SubPartition Level
***************
SQL> select count(1) from t1 where object_id=88
2 /
COUNT(1)
----------
35
--可以明显看到,预估值变成1,如下所示,但实际为35;
SQL> set autot traceonly exp
SQL> select object_name from t1 where object_id=88;
Execution Plan
----------------------------------------------------------
Plan hash value: 3617692013
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 22 | 13 (0)| 00:00:01 |
|* 1 | TABLE ACCESS FULL| T1 | 1 | 22 | 13 (0)| 00:00:01 |
--------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("OBJECT_ID"=88)
--以下用公式来计算下变化的原因
QL> col column_name for a30
SQL> /
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 0 2
OBJECT_ID 1 38
OBJECT_ID 2 59
OBJECT_ID 3 79
OBJECT_ID 4 88
OBJECT_ID 5 89
OBJECT_ID 6 109
OBJECT_ID 7 129
OBJECT_ID 8 150
OBJECT_ID 9 170
OBJECT_ID 10 202
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 11 233
OBJECT_ID 12 274
OBJECT_ID 13 315
OBJECT_ID 14 356
OBJECT_ID 15 398
OBJECT_ID 16 439
OBJECT_ID 17 480
OBJECT_ID 18 521
OBJECT_ID 19 562
OBJECT_ID 20 603
OBJECT_ID 21 644
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 22 685
OBJECT_ID 23 726
OBJECT_ID 24 767
OBJECT_ID 25 808
OBJECT_ID 26 849
OBJECT_ID 27 890
OBJECT_ID 28 931
OBJECT_ID 29 972
OBJECT_ID 30 1013
OBJECT_ID 31 1054
OBJECT_ID 32 1095
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 33 1136
OBJECT_ID 34 1177
OBJECT_ID 35 1218
OBJECT_ID 36 1259
OBJECT_ID 37 1300
OBJECT_ID 38 1341
OBJECT_ID 39 1382
OBJECT_ID 40 1423
OBJECT_ID 41 1464
OBJECT_ID 42 1505
OBJECT_ID 43 1546
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 44 1587
OBJECT_ID 45 1628
OBJECT_ID 46 1669
OBJECT_ID 47 1710
OBJECT_ID 48 1751
OBJECT_ID 49 1792
OBJECT_ID 50 1833
OBJECT_ID 51 1874
OBJECT_ID 52 1915
OBJECT_ID 53 1956
OBJECT_ID 54 1997
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 55 2038
OBJECT_ID 56 2079
OBJECT_ID 57 2120
OBJECT_ID 58 2161
OBJECT_ID 59 2202
OBJECT_ID 60 2242
OBJECT_ID 61 2282
OBJECT_ID 62 2322
OBJECT_ID 63 2362
OBJECT_ID 64 2402
OBJECT_ID 65 2442
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 66 2482
OBJECT_ID 67 2522
OBJECT_ID 68 2562
OBJECT_ID 69 2602
OBJECT_ID 70 2642
OBJECT_ID 71 2682
OBJECT_ID 72 2722
OBJECT_ID 73 2762
OBJECT_ID 74 2802
OBJECT_ID 75 2842
OBJECT_ID 76 2882
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 77 2922
OBJECT_ID 78 2962
OBJECT_ID 79 3002
OBJECT_ID 80 3042
OBJECT_ID 81 3082
OBJECT_ID 82 3122
OBJECT_ID 83 3162
OBJECT_ID 84 3202
OBJECT_ID 85 3242
OBJECT_ID 86 3282
OBJECT_ID 87 3322
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 88 3362
OBJECT_ID 89 3402
OBJECT_ID 90 3442
OBJECT_ID 91 3482
OBJECT_ID 92 3522
OBJECT_ID 93 3562
OBJECT_ID 94 3602
OBJECT_ID 95 3642
OBJECT_ID 96 3682
OBJECT_ID 97 3722
OBJECT_ID 98 3762
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 99 3802
OBJECT_ID 100 3842
OBJECT_ID 101 3882
OBJECT_ID 102 3922
OBJECT_ID 103 3962
OBJECT_ID 104 4002
OBJECT_ID 105 4042
OBJECT_ID 106 4082
OBJECT_ID 107 4122
OBJECT_ID 108 4162
OBJECT_ID 109 4202
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 110 4242
OBJECT_ID 111 4282
OBJECT_ID 112 4322
OBJECT_ID 113 4362
OBJECT_ID 114 4402
OBJECT_ID 115 4442
OBJECT_ID 116 4482
OBJECT_ID 117 4522
OBJECT_ID 118 4562
OBJECT_ID 119 4602
OBJECT_ID 120 4642
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 121 4682
OBJECT_ID 122 4722
OBJECT_ID 123 4762
OBJECT_ID 124 4802
OBJECT_ID 125 4842
OBJECT_ID 126 4882
OBJECT_ID 127 4922
OBJECT_ID 128 4962
OBJECT_ID 129 5002
OBJECT_ID 130 5042
OBJECT_ID 131 5082
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 132 5122
OBJECT_ID 133 5162
OBJECT_ID 134 5202
OBJECT_ID 135 5242
OBJECT_ID 136 5282
OBJECT_ID 137 5322
OBJECT_ID 138 5362
OBJECT_ID 139 5402
OBJECT_ID 140 5442
OBJECT_ID 141 5482
OBJECT_ID 142 5524
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 143 5564
OBJECT_ID 144 5604
OBJECT_ID 145 5644
OBJECT_ID 146 5684
OBJECT_ID 147 5724
OBJECT_ID 148 5764
OBJECT_ID 149 5804
OBJECT_ID 150 5844
OBJECT_ID 151 5884
OBJECT_ID 152 5924
OBJECT_ID 153 5965
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 154 6006
OBJECT_ID 155 6046
OBJECT_ID 156 6086
OBJECT_ID 157 6126
OBJECT_ID 158 6166
OBJECT_ID 159 6206
OBJECT_ID 160 6246
OBJECT_ID 161 6286
OBJECT_ID 162 6329
OBJECT_ID 163 6372
OBJECT_ID 164 6418
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 165 6461
OBJECT_ID 166 6505
OBJECT_ID 167 6549
OBJECT_ID 168 6591
OBJECT_ID 169 6632
OBJECT_ID 170 6672
OBJECT_ID 171 6712
OBJECT_ID 172 6752
OBJECT_ID 173 6792
OBJECT_ID 174 6832
OBJECT_ID 175 6872
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 176 6912
OBJECT_ID 177 6952
OBJECT_ID 178 7000
OBJECT_ID 179 7040
OBJECT_ID 180 7080
OBJECT_ID 181 7120
OBJECT_ID 182 7160
OBJECT_ID 183 7200
OBJECT_ID 184 7240
OBJECT_ID 185 7280
OBJECT_ID 186 7320
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 187 7360
OBJECT_ID 188 7400
OBJECT_ID 189 7440
OBJECT_ID 190 7480
OBJECT_ID 191 7520
OBJECT_ID 192 7560
OBJECT_ID 193 7600
OBJECT_ID 194 7640
OBJECT_ID 195 7680
OBJECT_ID 196 7720
OBJECT_ID 197 7760
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 198 7800
OBJECT_ID 199 7840
OBJECT_ID 200 7880
OBJECT_ID 201 7920
OBJECT_ID 202 7960
OBJECT_ID 203 8000
OBJECT_ID 204 8040
OBJECT_ID 205 8080
OBJECT_ID 206 8120
OBJECT_ID 207 8160
OBJECT_ID 208 8200
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 209 8240
OBJECT_ID 210 8280
OBJECT_ID 211 8320
OBJECT_ID 212 8360
OBJECT_ID 213 8400
OBJECT_ID 214 8440
OBJECT_ID 215 8480
OBJECT_ID 216 8520
OBJECT_ID 217 8560
OBJECT_ID 218 8600
OBJECT_ID 219 8640
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 220 8680
OBJECT_ID 221 8720
OBJECT_ID 222 8760
OBJECT_ID 223 8800
OBJECT_ID 224 8840
OBJECT_ID 225 8880
OBJECT_ID 226 8920
OBJECT_ID 227 8960
OBJECT_ID 228 9000
OBJECT_ID 229 9040
OBJECT_ID 230 9080
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 231 9120
OBJECT_ID 232 9160
OBJECT_ID 233 9200
OBJECT_ID 234 9240
OBJECT_ID 235 9280
OBJECT_ID 236 9320
OBJECT_ID 237 9360
OBJECT_ID 238 9400
OBJECT_ID 239 9440
OBJECT_ID 240 9480
OBJECT_ID 241 9520
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 242 9690
OBJECT_ID 243 9730
OBJECT_ID 244 9770
OBJECT_ID 245 9810
OBJECT_ID 246 9850
OBJECT_ID 247 9890
OBJECT_ID 248 9930
OBJECT_ID 249 9970
OBJECT_ID 250 10010
OBJECT_ID 251 10050
OBJECT_ID 252 10090
COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
------------------------------ --------------- --------------
OBJECT_ID 253 10130
OBJECT_ID 254 10170
255 rows selected.
--从上可以看到,不存在popular value
SQL>
SQL> select count(distinct object_id) from t1;
COUNT(DISTINCTOBJECT_ID)
------------------------
10000
SQL> select count(1) from t1;
COUNT(1)
----------
10219
根据公式计算:
selectivity=newdensity * null_adjust
null_adjust=(num_rows-num_nulls)/num_rows
newdensity
=(buckets_total-buckets_all_popular_values)/buckets_total/(ndv-popular_value.count)
=(254-0)/254/(10000-0)
SQL> select (254-0)/254/(10000-0)*10219 from dual;
(254-0)/254/(10000-0)*10219
---------------------------
1.0219
--可以看到结果就是执行计划里面所看到的预估值1,与实际值存在较大的偏差;