继续讨论:使用列表时,选择率的计算
以上我们知道了如何处理column=constant的情况,比如包含列表的查询、包含空值列表的查询、包含两列的查询、指定范围的查询和棒定变量的查询。
首先从最简单的情况in-list开始
比如我们随便统计3个月(6,7,8)出生的人数
SQL select count(*) from audience where month_no in (6,7,8)
首先环境的搭建:
drop table audience;
begin
begin execute immediate 'purge recyclebin';
exception when others then null;
end;
begin execute immediate 'begin dbms_stats.delete_system_stats; end;';
exception when others then null;
end;
begin execute immediate 'alter session set "_optimizer_cost_model"=io';
exception when others then null;
end;
end;
/
create table audience as
select
trunc(dbms_random.value(1,13)) month_no
from
all_objects
where
rownum <= 1200
;
begin
dbms_stats.gather_table_stats(
user,
'audience',
cascade=>true,
estimate_percent => null,
method_opt =>'for all columns size 1'
);
end;
/
测试用例:
set autotrace traceonly explain
spool in_list
select count(*) from audience where month_no in (1,2);
select count(*) from audience where month_no in (1,2,3);
select count(*) from audience where month_no in (1,2,3,4);
select count(*) from audience where month_no in (1,2,3,4,5);
select count(*) from audience where month_no in (1,2,3,4,5,6);
select count(*) from audience where month_no in (1,2,3,4,5,6,7);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9,10);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9,10,11);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9,10,11,12);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9,10,11,12,13);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9,10,11,12,13,14);
select count(*) from audience where month_no in (
1, 2, 3, 4, 5, 6, 7, 8, 9,10,
11,12,13,14,15,16,17,18,19,20,
21,22,23,24,25,26,27,28,29,30
);
SQL> select count(*) from audience where month_no in (1,2);
执行计划
----------------------------------------------------------
Plan hash value: 3337892515
---------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost |
---------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 3 | 2 |
| 1 | SORT AGGREGATE | | 1 | 3 | |
|* 2 | TABLE ACCESS FULL| AUDIENCE | 200 | 600 | 2 |
---------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("MONTH_NO"=1 OR "MONTH_NO"=2)
Note
-----
- cpu costing is off (consider enabling it)
统计信息
----------------------------------------------------------
1 recursive calls
0 db block gets
4 consistent gets
0 physical reads
0 redo size
409 bytes sent via SQL*Net to client
385 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
1 rows processed
SQL> select count(*) from audience where month_no in (1,2,3);
执行计划
----------------------------------------------------------
Plan hash value: 3337892515
---------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost |
---------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 3 | 2 |
| 1 | SORT AGGREGATE | | 1 | 3 | |
|* 2 | TABLE ACCESS FULL| AUDIENCE | 300 | 900 | 2 |
---------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("MONTH_NO"=1 OR "MONTH_NO"=2 OR "MONTH_NO"=3)
Note
-----
- cpu costing is off (consider enabling it)
统计信息
----------------------------------------------------------
1 recursive calls
0 db block gets
4 consistent gets
0 physical reads
0 redo size
409 bytes sent via SQL*Net to client
385 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
1 rows processed
--------------------------------------
SQL> select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9,10,11,12);
执行计划
----------------------------------------------------------
Plan hash value: 3337892515
---------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost |
---------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 3 | 2 |
| 1 | SORT AGGREGATE | | 1 | 3 | |
|* 2 | TABLE ACCESS FULL| AUDIENCE | 1200 | 3600 | 2 |
---------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("MONTH_NO"=1 OR "MONTH_NO"=2 OR "MONTH_NO"=3 OR
"MONTH_NO"=4 OR "MONTH_NO"=5 OR "MONTH_NO"=6 OR "MONTH_NO"=7 OR
"MONTH_NO"=8 OR "MONTH_NO"=9 OR "MONTH_NO"=10 OR "MONTH_NO"=11 OR
"MONTH_NO"=12)
Note
-----
- cpu costing is off (consider enabling it)
统计信息
----------------------------------------------------------
1 recursive calls
0 db block gets
4 consistent gets
0 physical reads
0 redo size
408 bytes sent via SQL*Net to client
385 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
1 rows processed
SQL> select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9,10,11,12,13,14);
执行计划
----------------------------------------------------------
Plan hash value: 3337892515
---------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost |
---------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 3 | 2 |
| 1 | SORT AGGREGATE | | 1 | 3 | |
|* 2 | TABLE ACCESS FULL| AUDIENCE | 1200 | 3600 | 2 |
---------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("MONTH_NO"=1 OR "MONTH_NO"=2 OR "MONTH_NO"=3 OR
"MONTH_NO"=4 OR "MONTH_NO"=5 OR "MONTH_NO"=6 OR "MONTH_NO"=7 OR
"MONTH_NO"=8 OR "MONTH_NO"=9 OR "MONTH_NO"=10 OR "MONTH_NO"=11 OR
"MONTH_NO"=12 OR "MONTH_NO"=13 OR "MONTH_NO"=14)
Note
-----
- cpu costing is off (consider enabling it)
统计信息
----------------------------------------------------------
1 recursive calls
0 db block gets
4 consistent gets
0 physical reads
0 redo size
408 bytes sent via SQL*Net to client
385 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
1 rows processed
SQL> select count(*) from audience where month_no in (
2 1, 2, 3, 4, 5, 6, 7, 8, 9,10,
3 11,12,13,14,15,16,17,18,19,20,
4 21,22,23,24,25,26,27,28,29,30
5 );
执行计划
----------------------------------------------------------
Plan hash value: 3337892515
---------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost |
---------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 3 | 2 |
| 1 | SORT AGGREGATE | | 1 | 3 | |
|* 2 | TABLE ACCESS FULL| AUDIENCE | 1200 | 3600 | 2 |
---------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("MONTH_NO"=1 OR "MONTH_NO"=2 OR "MONTH_NO"=3 OR
"MONTH_NO"=4 OR "MONTH_NO"=5 OR "MONTH_NO"=6 OR "MONTH_NO"=7 OR
"MONTH_NO"=8 OR "MONTH_NO"=9 OR "MONTH_NO"=10 OR "MONTH_NO"=11 OR
"MONTH_NO"=12 OR "MONTH_NO"=13 OR "MONTH_NO"=14 OR "MONTH_NO"=15 OR
"MONTH_NO"=16 OR "MONTH_NO"=17 OR "MONTH_NO"=18 OR "MONTH_NO"=19 OR
"MONTH_NO"=20 OR "MONTH_NO"=21 OR "MONTH_NO"=22 OR "MONTH_NO"=23 OR
"MONTH_NO"=24 OR "MONTH_NO"=25 OR "MONTH_NO"=26 OR "MONTH_NO"=27 OR
"MONTH_NO"=28 OR "MONTH_NO"=29 OR "MONTH_NO"=30)
Note
-----
- cpu costing is off (consider enabling it)
统计信息
----------------------------------------------------------
1 recursive calls
0 db block gets
4 consistent gets
0 physical reads
0 redo size
408 bytes sent via SQL*Net to client
385 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
1 rows processed
中间几个查询就不一一做了,
会的出如下一个结论:
列表大小 基数--12 基数1000
1 100 12
2 200 24
3 300 36
4 400 48
5
6
7
8
9 900 108
10 1000
11
12 1200 144
13 1200 156
14 1200 168
30 1200 360
当有12个值的时候两个结果的区别比有1000个值时的区别更为明显,在10G发现在列条目没有超过不同值的数目前:基数=N*列表中的条数目
有1000个不同值的脚本如下:有兴趣的可以测试下:
drop table audience;
begin
begin execute immediate 'purge recyclebin';
exception when others then null;
end;
begin execute immediate 'begin dbms_stats.delete_system_stats; end;';
exception when others then null;
end;
begin execute immediate 'alter session set "_optimizer_cost_model"=io';
exception when others then null;
end;
end;
/
/*
drop table generator;
create table generator as
select
rownum id
from all_objects
where rownum <= 1000
;
*/
create table audience as
with generator as (
select --+ materialize
rownum id
from all_objects
where rownum <= 5000
)
select
trunc(dbms_random.value(1,1001)) month_no
from
generator v1,
generator v2
where
rownum <= 12000
;
begin
dbms_stats.gather_table_stats(
user,
'audience',
cascade=>true,
estimate_percent => null,
method_opt =>'for all columns size 1'
);
end;
/
set autotrace traceonly explain
spool in_list_02
select count(*) from audience where month_no in (1,2);
select count(*) from audience where month_no in (1,2,3);
select count(*) from audience where month_no in (1,2,3,4);
select count(*) from audience where month_no in (1,2,3,4,5);
select count(*) from audience where month_no in (1,2,3,4,5,6);
select count(*) from audience where month_no in (1,2,3,4,5,6,7);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9,10);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9,10,11);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9,10,11,12);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9,10,11,12,13);
select count(*) from audience where month_no in (1,2,3,4,5,6,7,8,9,10,11,12,13,14);
select count(*) from audience where month_no in (
1, 2, 3, 4, 5, 6, 7, 8, 9,10,
11,12,13,14,15,16,17,18,19,20,
21,22,23,24,25,26,27,28,29,30
);
set autotrace off
spool off
接下来我们看以12个不同值的表为例,我们看一些奇怪现象:
where month_no = 25 ----- 超过high_value的情况
where month_no in (4, 4) ----重复的值
where month_no in (3, 25) ---输入输出 的混合集
where month_no in (3, 25, 26) ---同上
where month_no in (3, 25, 25, 26)--同上,但包含重复值
where month_no in (3, 25, null) ---优化器能否检测出NULL
where month_no in (:b1, :b2, :b3) ---包含还是不包含绑定变量
搭建环境脚本:
execute dbms_random.seed(0);
drop table audience;
begin
begin execute immediate 'purge recyclebin';
exception when others then null;
end;
begin execute immediate 'begin dbms_stats.delete_system_stats; end;';
exception when others then null;
end;
begin execute immediate 'alter session set "_optimizer_cost_model"=io';
exception when others then null;
end;
end;
/
create table audience as
select
trunc(dbms_random.value(1,13)) month_no
from
all_objects
where
rownum <= 1200
;
begin
dbms_stats.gather_table_stats(
user,
'audience',
cascade => true,
estimate_percent => null,
method_opt => 'for all columns size 1'
);
end;
/
rem
rem A little function to make is possible to call
rem the conversion routines in dbms_stats from an
rem SQL statement
rem
create or replace function value_to_number(i_raw in raw)
return number deterministic as
m_n number(6);
begin
dbms_stats.convert_raw_value(i_raw,m_n);
return m_n;
end;
.
/
variable b1 number;
variable b2 number;
variable b3 number;
set autotrace traceonly explain
spool oddities
select count(*) from audience
where month_no = 25
;
select count(*) from audience
where month_no in (4, 4)
;
select count(*) from audience
where month_no in (3, 25)
;
select count(*) from audience
where month_no in (3, 25, 26)
;
select count(*) from audience
where month_no in (3, 25, 25, 26)
;
select count(*) from audience
where month_no in (3, 25, null)
;
select count(*) from audience
where month_no in (:b1, :b2, :b3)
;
set autotrace off
spool off
结论是:
谓词 基数
where month_no = 25 ----- 超过high_value的情况 好 1,应该是10.2中改进的,10.1中还没,幸好测试了下
where month_no in (4, 4) ----重复的值 好 100
where month_no in (3, 25) ---输入输出 的混合集 好, 100
where month_no in (3, 25, 26) ---同上 好 101
where month_no in (3, 25, 25, 26)--同上,但包含重复值 好, 101
where month_no in (3, 25, null) ---优化器能否检测出NULL 好 200 应该是10.2中改进的,10.1中还没,幸好测试了下
where month_no in (:b1, :b2, :b3) ---包含还是不包含绑定变量 不好,但一致 300
幸好,挨个测试了下,10。2中改进了许多
明天继续
[本帖最后由 bosonmaster 于 2008-2-27 09:49 编辑]