前言:
本文主要介绍子查询in字句在不同数据量下的表的连接方式以及执行路径选择
测试表:
create table a (id number,owner varchar2(200),name varchar2(200));
create table b (id number,addrselect * from a where a.id in (select id from b)ess varchar2(200),card varchar2(200));
查询语句:
select * from a where a.id in (select id from b)
场景一:当A表为1000W,B表为100行
如果a表id的可选择率很高,则高效的执行计划为b,a表nest loop ,b表作为驱动表,a表做为被驱动表并且通过索引访问a表
---创建索引
create index test_ind1_a on a(id);
---插入数据
truncate table a;
truncate table b;
create or replace procedure p1 is
v_count number(10):=0;
begin
for i in 1..10000000 loop
insert into a values(v_count,'aaaaaaaaa','bbbbbbbbbbbbbbbb');
v_count:=v_count+1;
if v_count>=10000 THEN
commit;
v_count:=0;
end if;
end loop;
commit;
end;
/
exec p1
begin
for i in 1..100 loop
insert into b values(i,'aaaaaaaaa','bbbbbbbbbbbbbbbb');
end loop;
commit;
end;
/
---收集统计信息
exec DBMS_STATS.GATHER_TABLE_STATS(ownname=>'test',tabname=>'a',ESTIMATE_PERCENT=>30,method_opt=>'for all columns size auto',cascade=>true,force=>true,degree=>4);
exec DBMS_STATS.GATHER_TABLE_STATS(ownname=>'test',tabname=>'b',ESTIMATE_PERCENT=>30,method_opt=>'for all columns size auto',cascade=>true,force=>true,degree=>4);
---执行语句
set autotrace trace
select /*+use_nl(a) leading(B@TEMP) index(a test_ind1_a) */ * from a where a.id in (select /*+qb_name(temp) */ id from b);
set autotrace off
Execution Plan
----------------------------------------------------------
Plan hash value: 4263097039
--------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 99880 | 3316K| 50114 (1)| 00:00:02 |
| 1 | NESTED LOOPS | | 99880 | 3316K| 50114 (1)| 00:00:02 |
| 2 | NESTED LOOPS | | 99880 | 3316K| 50114 (1)| 00:00:02 |
| 3 | SORT UNIQUE | | 100 | 300 | 3 (0)| 00:00:01 |
| 4 | TABLE ACCESS FULL | B | 100 | 300 | 3 (0)| 00:00:01 |
|* 5 | INDEX RANGE SCAN | TEST_IND1_A | 999 | | 3 (0)| 00:00:01 |
| 6 | TABLE ACCESS BY INDEX ROWID| A | 999 | 30969 | 1002 (0)| 00:00:01 |
--------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
5 - access("A"."ID"="ID")
Statistics
----------------------------------------------------------
0 recursive calls
0 db block gets
107234 consistent gets
0 physical reads
0 redo size
1774294 bytes sent via SQL*Net to client
73810 bytes received via SQL*Net from client
6668 SQL*Net roundtrips to/from client
1 sorts (memory)
0 sorts (disk)
100000 rows processed
如果a表id的可选择率不高,则高效的执行计划为b,a表hash join semi ,b小表作为驱动表,a表做为被驱动表
---插入数据
truncate table a;
truncate table b;
create or replace procedure p1 is
v_count number(10):=0;
begin
for i in 1..10000000 loop
insert into a values(v_count,'aaaaaaaaa','bbbbbbbbbbbbbbbb');
v_count:=v_count+1;
if v_count>=100 THEN
commit;
v_count:=0;
end if;
end loop;
commit;
end;
/
exec p1
begin
for i in 1..100 loop
insert into b values(i,'aaaaaaaaa','bbbbbbbbbbbbbbbb');
end loop;
commit;
end;
/
---收集统计信息
exec DBMS_STATS.GATHER_TABLE_STATS(ownname=>'test',tabname=>'a',ESTIMATE_PERCENT=>30,method_opt=>'for all columns size auto',cascade=>true,force=>true,degree=>4);
exec DBMS_STATS.GATHER_TABLE_STATS(ownname=>'test',tabname=>'b',ESTIMATE_PERCENT=>30,method_opt=>'for all columns size auto',cascade=>true,force=>true,degree=>4);
---执行查询
set autotrace trace
select * from a where a.id in (select /*+hash_sj */ id from b);
set autotrace off
9900000 rows selected.
Elapsed: 00:01:51.35
Execution Plan
----------------------------------------------------------
Plan hash value: 2016728674
-----------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 9991K| 314M| 13324 (1)| 00:00:01 |
|* 1 | HASH JOIN RIGHT SEMI| | 9991K| 314M| 13324 (1)| 00:00:01 |
| 2 | TABLE ACCESS FULL | B | 100 | 300 | 3 (0)| 00:00:01 |
| 3 | TABLE ACCESS FULL | A | 9991K| 285M| 13294 (1)| 00:00:01 |
-----------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - access("A"."ID"="ID")
Statistics
----------------------------------------------------------
1 recursive calls
0 db block gets
705207 consistent gets
48345 physical reads
0 redo size
205260414 bytes sent via SQL*Net to client
7260414 bytes received via SQL*Net from client
660001 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
9900000 rows processed
场景二:当A表为100行,B表为1000W
如果b表id的可选择率很高,高效的执行计划为a,b表nest loop semi,a表作为驱动表,b表做为被驱动表并且通过索引访问b表
---创建索引
create index test_ind1_b on b(id);
---插入数据
truncate table a;
truncate table b;
create or replace procedure p1 is
v_count number(10):=0;
begin
for i in 1..10000000 loop
insert into b values(v_count,'aaaaaaaaa','bbbbbbbbbbbbbbbb');
v_count:=v_count+1;
if v_count>=10000 THEN
commit;
v_count:=0;
end if;
end loop;
commit;
end;
/
exec p1;
begin
for i in 1..100 loop
insert into a values(i,'aaaaaaaaa','bbbbbbbbbbbbbbbb');
end loop;
commit;
end;
/
---收集统计信息
exec DBMS_STATS.GATHER_TABLE_STATS(ownname=>'test',tabname=>'a',ESTIMATE_PERCENT=>30,method_opt=>'for all columns size auto',cascade=>true,force=>true,degree=>4);
exec DBMS_STATS.GATHER_TABLE_STATS(ownname=>'test',tabname=>'b',ESTIMATE_PERCENT=>30,method_opt=>'for all columns size auto',cascade=>true,force=>true,degree=>4);
---执行语句
set autotrace trace
select * from a where a.id in (select /*+nl_sj index(b test_ind1_b) */ id from b);
set autotrace off
Execution Plan
----------------------------------------------------------
Plan hash value: 3690787731
----------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 100 | 3400 | 203 (0)| 00:00:01 |
| 1 | NESTED LOOPS SEMI | | 100 | 3400 | 203 (0)| 00:00:01 |
| 2 | TABLE ACCESS FULL| A | 100 | 3000 | 3 (0)| 00:00:01 |
|* 3 | INDEX RANGE SCAN | TEST_IND1_B | 9995K| 38M| 2 (0)| 00:00:01 |
----------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("A"."ID"="ID")
Note
-----
- this is an adaptive plan
Statistics
----------------------------------------------------------
0 recursive calls
0 db block gets
221 consistent gets
0 physical reads
0 redo size
2731 bytes sent via SQL*Net to client
509 bytes received via SQL*Net from client
8 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
100 rows processed
如果b表id的可选择率不高,高效的执行计划为a,b 表hash join semi ,a表作为驱动表,b表做为被驱动表
---插入数据
truncate table a;
truncate table b;
create or replace procedure p1 is
v_count number(10):=0;
begin
for i in 1..10000000 loop
insert into b values(v_count,'aaaaaaaaa','bbbbbbbbbbbbbbbb');
v_count:=v_count+1;
if v_count>=100 THEN
commit;
v_count:=0;
end if;
end loop;
commit;
end;
/
exec p1;
begin
for i in 1..100 loop
insert into a values(i,'aaaaaaaaa','bbbbbbbbbbbbbbbb');
end loop;
commit;
end;
/
---执行统计信息收集
exec DBMS_STATS.GATHER_TABLE_STATS(ownname=>'test',tabname=>'a',ESTIMATE_PERCENT=>30,method_opt=>'for all columns size auto',cascade=>true,force=>true,degree=>4);
exec DBMS_STATS.GATHER_TABLE_STATS(ownname=>'test',tabname=>'b',ESTIMATE_PERCENT=>30,method_opt=>'for all columns size auto',cascade=>true,force=>true,degree=>4);
---执行查询
set autotrace trace
select * from a where a.id in (select /*+hash_sj */ id from b);
set autotrace off
Execution Plan
----------------------------------------------------------
Plan hash value: 3821265669
---------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 100 | 3300 | 13313 (1)| 00:00:01 |
|* 1 | HASH JOIN SEMI | | 100 | 3300 | 13313 (1)| 00:00:01 |
| 2 | TABLE ACCESS FULL| A | 100 | 3000 | 3 (0)| 00:00:01 |
| 3 | TABLE ACCESS FULL| B | 9996K| 28M| 13284 (1)| 00:00:01 |
---------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - access("A"."ID"="ID")
Statistics
----------------------------------------------------------
1 recursive calls
0 db block gets
48368 consistent gets
48345 physical reads
0 redo size
2723 bytes sent via SQL*Net to client
491 bytes received via SQL*Net from client
8 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
99 rows processed
SQL>
SQL>
--
Predicate Information (identified by operation id):
---------------------------------------------------
1 - access("A"."ID"="ID")
Statistics
----------------------------------------------------------
1 recursive calls
0 db block gets
1 consistent gets
0 physical reads
0 redo size
495 bytes sent via SQL*Net to client
436 bytes received via SQL*Net from client
1 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
0 rows processed
场景三:当A表为100W,B表为1000W
这时候高效的执行计划是a,b 表hash join semi,a表作为驱动表,b表做为被驱动表
---插入数据
truncate table a;
truncate table b;
create or replace procedure p1 is
v_count number(10):=0;
begin
for i in 1..10000000 loop
insert into b values(v_count,'aaaaaaaaa','bbbbbbbbbbbbbbbb');
v_count:=v_count+1;
if v_count>=100 THEN
commit;
v_count:=0;
end if;
end loop;
commit;
end;
/
exec p1;
begin
for i in 1..1000000 loop
insert into a values(i,'aaaaaaaaa','bbbbbbbbbbbbbbbb');
end loop;
commit;
end;
/
---执行统计信息收集
exec DBMS_STATS.GATHER_TABLE_STATS(ownname=>'test',tabname=>'a',ESTIMATE_PERCENT=>30,method_opt=>'for all columns size auto',cascade=>true,force=>true,degree=>4);
exec DBMS_STATS.GATHER_TABLE_STATS(ownname=>'test',tabname=>'b',ESTIMATE_PERCENT=>30,method_opt=>'for all columns size auto',cascade=>true,force=>true,degree=>4);
---执行查询
set autotrace trace
select * from a where a.id in (select /*+hash_sj */ id from b);
set autotrace off
Execution Plan
----------------------------------------------------------
Plan hash value: 3821265669
-----------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes |TempSpc| Cost (%CPU)| Time |
-----------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 101 | 3535 | | 23918 (1)| 00:00:01 |
|* 1 | HASH JOIN SEMI | | 101 | 3535 | 42M| 23918 (1)| 00:00:01 |
| 2 | TABLE ACCESS FULL| A | 1001K| 30M| | 1405 (1)| 00:00:01 |
| 3 | TABLE ACCESS FULL| B | 9996K| 28M| | 13284 (1)| 00:00:01 |
-----------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - access("A"."ID"="ID")
Statistics
----------------------------------------------------------
85 recursive calls
0 db block gets
53496 consistent gets
63610 physical reads
0 redo size
2723 bytes sent via SQL*Net to client
492 bytes received via SQL*Net from client
8 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
99 rows processed