关闭

MSQL优化基础(height_blanced直方图选择率)

标签: height_blanced直方图选择率
261人阅读 评论(0) 收藏 举报
分类:

NDV>255, 并且数据分布不均衡,则倾向搜集 height_blanced直方图

 

准备数据

drop table t1;
create table t1(n1 number);
declare
min1 number;
max1 number;
begin
	for i in 1..600 loop
		min1 := trunc(dbms_random.value(1,200),0);
		max1 := trunc(dbms_random.value(min1,200),0);
		for j in min1..max1 loop
			insert into t1 values(i);
		end loop;
		commit;
	end loop;
	
	for i in 1..200 loop
		insert into t1 values(40);
		insert into t1 values(80);
	end loop;
end;
/

begin
	for i in 1..800 loop
		insert into t1 values(40);
		insert into t1 values(81);
	end loop;
	commit;
end;
/


begin
	for i in 1..200 loop
		insert into t1 values(39);
	end loop;
end;
/

exec dbms_stats.gather_table_stats(null,'T1',method_opt=>'for all columns size 254');

select num_rows from user_tables where table_name='T1';
  NUM_ROWS
----------
     31696  
select num_distinct,num_nulls, low_value,high_value,HISTOGRAM from user_tab_columns where table_name='T1'
NUM_DISTINCT  NUM_NULLS LOW_VALUE       HIGH_VALUE      HISTOGRAM
------------ ---------- --------------- --------------- ---------------
         536          0 C102            C207            HEIGHT BALANCED


 

 

案例1 : in-range =   var 经常出现的值

选择率 = (val所在的buckets数/bucket总数)*非空率

select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number))  buckets from user_histograms where table_name='T1'
ENDPOINT_VALUE ENDPOINT_NUMBER    BUCKETS
-------------- --------------- ----------
...
 40              27                10
...


select * from t1 where n1 = 40;  --1129 rows selected.
--------------------------------------------------------------------------
| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |  1248 |  3744 |    14   (8)| 00:00:01 |
|*  1 |  TABLE ACCESS FULL| T1   |  1248 |  3744 |    14   (8)| 00:00:01 |
--------------------------------------------------------------------------

选择率 = (val所在的buckets数/bucket总数)*非空率 = 10/254 =.039370079
cardinality = .039370079*31696 =1247.87402  

和rows一样


案例1 : in-range =   var 经常出现的值案例2 : in-range =   var 不出现的值

密度 = (bucket总数 - 所有的常见值的bucket数)/bucket总数/(NDV - 常见值的个数)

选择率 = 密度 * 非空率

select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number))  buckets from user_histograms where table_name='T1'
ENDPOINT_VALUE ENDPOINT_NUMBER    BUCKETS
-------------- --------------- ----------
...
 40              27                10
...

594             250          1
597             252          2
599             253          1
...



select * from t1 where n1=598 (596,595 之类的 出来都是 157 )
--------------------------------------------------------------------------
| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |       |       |    14 (100)|          |
|*  1 |  TABLE ACCESS FULL| T1   |   157 |   471 |    14   (8)| 00:00:01 |
--------------------------------------------------------------------------

select sum(buckets),count(*) from(
select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number))  buckets from user_histograms where table_name='T1'
) where buckets > 1;
SUM(BUCKETS)   COUNT(*)
------------ ----------
          51         19

密度 =  (254 - 51)/254/(536 - 19) =.001545866
选择率=密度 *1 
cardinality=.001545866*31696=48.9977687   和 rows不一样。
测试环境 10.2 


 

 案例3 : in-range  <

选择率=(Count(Buckets) / Bucket总数)*非空率

select * from t1 where n1<256
--------------------------------------------------------------------------
| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      | 14475 | 43425 |    14   (8)| 00:00:01 |
|*  1 |  TABLE ACCESS FULL| T1   | 14475 | 43425 |    14   (8)| 00:00:01 |
--------------------------------------------------------------------------

select sum(buckets),count(*) from(
select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number))  buckets from user_histograms where table_name='T1'
) where endpoint_value<=256;

SUM(BUCKETS)   COUNT(*)
------------ ----------
         116         96
		 
selectivity= (Count(Buckets) / Bucket总数)*非空率 =116/254 = .456692913
cardinality=.456692913*31696=14475.3386
和rows一样

 

0
0

查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:30227次
    • 积分:1468
    • 等级:
    • 排名:千里之外
    • 原创:122篇
    • 转载:0篇
    • 译文:0篇
    • 评论:1条
    最新评论