MSQL优化基础(height_blanced直方图选择率)

NDV>255, 并且数据分布不均衡,则倾向搜集 height_blanced直方图

 

准备数据

drop table t1;
create table t1(n1 number);
declare
min1 number;
max1 number;
begin
	for i in 1..600 loop
		min1 := trunc(dbms_random.value(1,200),0);
		max1 := trunc(dbms_random.value(min1,200),0);
		for j in min1..max1 loop
			insert into t1 values(i);
		end loop;
		commit;
	end loop;
	
	for i in 1..200 loop
		insert into t1 values(40);
		insert into t1 values(80);
	end loop;
end;
/

begin
	for i in 1..800 loop
		insert into t1 values(40);
		insert into t1 values(81);
	end loop;
	commit;
end;
/


begin
	for i in 1..200 loop
		insert into t1 values(39);
	end loop;
end;
/

exec dbms_stats.gather_table_stats(null,'T1',method_opt=>'for all columns size 254');

select num_rows from user_tables where table_name='T1';
  NUM_ROWS
----------
     31696  
select num_distinct,num_nulls, low_value,high_value,HISTOGRAM from user_tab_columns where table_name='T1'
NUM_DISTINCT  NUM_NULLS LOW_VALUE       HIGH_VALUE      HISTOGRAM
------------ ---------- --------------- --------------- ---------------
         536          0 C102            C207            HEIGHT BALANCED


 

 

案例1 : in-range =   var 经常出现的值

选择率 = (val所在的buckets数/bucket总数)*非空率

select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number))  buckets from user_histograms where table_name='T1'
ENDPOINT_VALUE ENDPOINT_NUMBER    BUCKETS
-------------- --------------- ----------
...
 40              27                10
...


select * from t1 where n1 = 40;  --1129 rows selected.
--------------------------------------------------------------------------
| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |  1248 |  3744 |    14   (8)| 00:00:01 |
|*  1 |  TABLE ACCESS FULL| T1   |  1248 |  3744 |    14   (8)| 00:00:01 |
--------------------------------------------------------------------------

选择率 = (val所在的buckets数/bucket总数)*非空率 = 10/254 =.039370079
cardinality = .039370079*31696 =1247.87402  

和rows一样


案例1 : in-range =   var 经常出现的值案例2 : in-range =   var 不出现的值

密度 = (bucket总数 - 所有的常见值的bucket数)/bucket总数/(NDV - 常见值的个数)

选择率 = 密度 * 非空率

select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number))  buckets from user_histograms where table_name='T1'
ENDPOINT_VALUE ENDPOINT_NUMBER    BUCKETS
-------------- --------------- ----------
...
 40              27                10
...

594             250          1
597             252          2
599             253          1
...



select * from t1 where n1=598 (596,595 之类的 出来都是 157 )
--------------------------------------------------------------------------
| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |       |       |    14 (100)|          |
|*  1 |  TABLE ACCESS FULL| T1   |   157 |   471 |    14   (8)| 00:00:01 |
--------------------------------------------------------------------------

select sum(buckets),count(*) from(
select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number))  buckets from user_histograms where table_name='T1'
) where buckets > 1;
SUM(BUCKETS)   COUNT(*)
------------ ----------
          51         19

密度 =  (254 - 51)/254/(536 - 19) =.001545866
选择率=密度 *1 
cardinality=.001545866*31696=48.9977687   和 rows不一样。
测试环境 10.2 


 

 案例3 : in-range  <

选择率=(Count(Buckets) / Bucket总数)*非空率

select * from t1 where n1<256
--------------------------------------------------------------------------
| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      | 14475 | 43425 |    14   (8)| 00:00:01 |
|*  1 |  TABLE ACCESS FULL| T1   | 14475 | 43425 |    14   (8)| 00:00:01 |
--------------------------------------------------------------------------

select sum(buckets),count(*) from(
select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number))  buckets from user_histograms where table_name='T1'
) where endpoint_value<=256;

SUM(BUCKETS)   COUNT(*)
------------ ----------
         116         96
		 
selectivity= (Count(Buckets) / Bucket总数)*非空率 =116/254 = .456692913
cardinality=.456692913*31696=14475.3386
和rows一样

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值