create table test1
(
name varchar(10),
NN varchar(10)
);
insert into test1 values('test','A');
insert into test1 values('test1','B');
insert into test1 values('test1','C');
insert into test1 values('test1','D');
insert into test1 values('test1','E');
create table test2
(
name varchar(10),
NN varchar(10)
);
insert into test2 values('test','A');
insert into test2 values('test2','B');
insert into test2 values('test2','C');
insert into test2 values('test2','D');
insert into test2 values('test2','E');
1、交集:intersect
SQL> select * from test1 intersect select * from test2;
NAME NN
---------- ----------
test A
2、并集:union、union all (注意两者的区别)
SQL> select * from test1 union select * from test2;
NAME NN
---------- ----------
test A
test1 B
test1 C
test1 D
test1 E
test2 B
test2 C
test2 D
test2 E
9 rows selected.
SQL> select * from test1 union all select * from test2;
NAME NN
---------- ----------
test A
test1 B
test1 C
test1 D
test1 E
test A
test2 B
test2 C
test2 D
test2 E
10 rows selected.
3、差集:minus
SQL> select * from test1 minus select * from test2;
NAME NN
---------- ----------
test1 B
test1 C
test1 D
test1 E
SQL> select * from test2 minus select * from test1;
NAME NN
---------- ----------
test2 B
test2 C
test2 D
test2 E
最后对于求交集用intersect效率高呢还是hash join效率高呢?
SQL> select * from test1 intersect select * from test2;
Execution Plan
----------------------------------------------------------
Plan hash value: 4290880088
-----------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 5 | 140 | 8 (63)| 00:00:01 |
| 1 | INTERSECTION | | | | | |
| 2 | SORT UNIQUE | | 5 | 70 | 4 (25)| 00:00:01 |
| 3 | TABLE ACCESS FULL| TEST1 | 5 | 70 | 3 (0)| 00:00:01 |
| 4 | SORT UNIQUE | | 5 | 70 | 4 (25)| 00:00:01 |
| 5 | TABLE ACCESS FULL| TEST2 | 5 | 70 | 3 (0)| 00:00:01 |
-----------------------------------------------------------------------------
Note
-----
- dynamic sampling used for this statement (level=2)
Statistics
----------------------------------------------------------
0 recursive calls
0 db block gets
14 consistent gets
0 physical reads
0 redo size
590 bytes sent via SQL*Net to client
523 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
2 sorts (memory)
0 sorts (disk)
1 rows processed
SQL> select a.* from test1 a,test2 b where a.name=b.name and a.nn=b.nn;
Execution Plan
----------------------------------------------------------
Plan hash value: 497311279
----------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 5 | 140 | 7 (15)| 00:00:01 |
|* 1 | HASH JOIN | | 5 | 140 | 7 (15)| 00:00:01 |
| 2 | TABLE ACCESS FULL| TEST1 | 5 | 70 | 3 (0)| 00:00:01 |
| 3 | TABLE ACCESS FULL| TEST2 | 5 | 70 | 3 (0)| 00:00:01 |
----------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - access("A"."NAME"="B"."NAME" AND "A"."NN"="B"."NN")
Note
-----
- dynamic sampling used for this statement (level=2)
Statistics
----------------------------------------------------------
0 recursive calls
0 db block gets
15 consistent gets
0 physical reads
0 redo size
590 bytes sent via SQL*Net to client
523 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
1 rows processed
cost值intersect比hash join 高,但是逻辑读intersect比hash join的低。不知道大量数据的时候,会是什么样