NDV>255, 并且数据分布不均衡,则倾向搜集 height_blanced直方图
准备数据
drop table t1;
create table t1(n1 number);
declare
min1 number;
max1 number;
begin
for i in 1..600 loop
min1 := trunc(dbms_random.value(1,200),0);
max1 := trunc(dbms_random.value(min1,200),0);
for j in min1..max1 loop
insert into t1 values(i);
end loop;
commit;
end loop;
for i in 1..200 loop
insert into t1 values(40);
insert into t1 values(80);
end loop;
end;
/
begin
for i in 1..800 loop
insert into t1 values(40);
insert into t1 values(81);
end loop;
commit;
end;
/
begin
for i in 1..200 loop
insert into t1 values(39);
end loop;
end;
/
exec dbms_stats.gather_table_stats(null,'T1',method_opt=>'for all columns size 254');
select num_rows from user_tables where table_name='T1';
NUM_ROWS
----------
31696
select num_distinct,num_nulls, low_value,high_value,HISTOGRAM from user_tab_columns where table_name='T1'
NUM_DISTINCT NUM_NULLS LOW_VALUE HIGH_VALUE HISTOGRAM
------------ ---------- --------------- --------------- ---------------
536 0 C102 C207 HEIGHT BALANCED
案例1 : in-range = var 经常出现的值
选择率 = (val所在的buckets数/bucket总数)*非空率
select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number)) buckets from user_histograms where table_name='T1'
ENDPOINT_VALUE ENDPOINT_NUMBER BUCKETS
-------------- --------------- ----------
...
40 27 10
...
select * from t1 where n1 = 40; --1129 rows selected.
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1248 | 3744 | 14 (8)| 00:00:01 |
|* 1 | TABLE ACCESS FULL| T1 | 1248 | 3744 | 14 (8)| 00:00:01 |
--------------------------------------------------------------------------
选择率 = (val所在的buckets数/bucket总数)*非空率 = 10/254 =.039370079
cardinality = .039370079*31696 =1247.87402
和rows一样
案例1 : in-range = var 经常出现的值案例2 : in-range = var 不出现的值
密度 = (bucket总数 - 所有的常见值的bucket数)/bucket总数/(NDV - 常见值的个数)
选择率 = 密度 * 非空率
select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number)) buckets from user_histograms where table_name='T1'
ENDPOINT_VALUE ENDPOINT_NUMBER BUCKETS
-------------- --------------- ----------
...
40 27 10
...
594 250 1
597 252 2
599 253 1
...
select * from t1 where n1=598 (596,595 之类的 出来都是 157 )
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 14 (100)| |
|* 1 | TABLE ACCESS FULL| T1 | 157 | 471 | 14 (8)| 00:00:01 |
--------------------------------------------------------------------------
select sum(buckets),count(*) from(
select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number)) buckets from user_histograms where table_name='T1'
) where buckets > 1;
SUM(BUCKETS) COUNT(*)
------------ ----------
51 19
密度 = (254 - 51)/254/(536 - 19) =.001545866
选择率=密度 *1
cardinality=.001545866*31696=48.9977687 和 rows不一样。
测试环境 10.2
案例3 : in-range <
选择率=(Count(Buckets) / Bucket总数)*非空率
select * from t1 where n1<256
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 14475 | 43425 | 14 (8)| 00:00:01 |
|* 1 | TABLE ACCESS FULL| T1 | 14475 | 43425 | 14 (8)| 00:00:01 |
--------------------------------------------------------------------------
select sum(buckets),count(*) from(
select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number)) buckets from user_histograms where table_name='T1'
) where endpoint_value<=256;
SUM(BUCKETS) COUNT(*)
------------ ----------
116 96
selectivity= (Count(Buckets) / Bucket总数)*非空率 =116/254 = .456692913
cardinality=.456692913*31696=14475.3386
和rows一样