– 准备测试表及数据
create table t1_even (id int, col1 varchar(100), col2 varchar(100)) diststyle key distkey (id);
create table t2_even (t1_id int , col1 varchar(100), col2 varchar(100)) diststyle even;
create table t3_key (t1_id int, col1 varchar(100), col2 varchar(100)) diststyle key distkey (t1_id);
create table t4_all (t1_id int, col1 varchar(100), col2 varchar(100)) diststyle all;
insert into t1_even values (1,'a','def');
insert into t1_even values (2,'b','def');
insert into t1_even values (3,'c','def');
insert into t1_even values (4,'d','def');
insert into t1_even values (5,'e','def');
insert into t1_even values (6,'f','def');
insert into t1_even values (7,'g','def');
insert into t1_even values (8,'h','def');
insert into t1_even values (9,'i','def');
insert into t2_even values (1,'a','def');
insert into t2_even values (2,'b','def');
insert into t2_even values (3,'c','def');
insert into t2_even values (4,'d','def');
insert into t2_even values (5,'e','def');
insert into t2_even values (6,'f','def');
insert into t2_even values (7,'g','def');
insert into t2_even values (8,'h','def');
insert into t2_even values (9,'i','def');
insert into t3_key values (1,'a','def');
insert into t3_key values (2,'b','def');
insert into t3_key values (3,'c','def');
insert into t3_key values (4,'d','def');
insert into t3_key values (5,'e','def');
insert into t3_key values (6,'f','def');
insert into t3_key values (7,'g','def');
insert into t3_key values (8,'h','def');
insert into t3_key values (9,'i','def');
insert into t4_all values (1,'a','def');
insert into t4_all values (2,'b','def');
insert into t4_all values (3,'c','def');
insert into t4_all values (4,'d','def');
insert into t4_all values (5,'e','def');
insert into t4_all values (6,'f','def');
insert into t4_all values (7,'g','def');
insert into t4_all values (8,'h','def');
insert into t4_all values (9,'i','def');
查看数据分布
testdb=# select name, slice, col, num_values as rows, minvalue, maxvalue
from svv_diskusage
where name in ('t1_even', 't2_even' ,'t3_key','t4_all') and col=0 and rows>0
order by name, slice, col;
name | slice | col | rows | minvalue | maxvalue
--------------+-------+-----+------+----------+----------
t1_even | 1 | 0 | 2 | 1 | 6
t1_even | 2 | 0 | 2 | 4 | 7
t1_even | 3 | 0 | 2 | 3 | 8
t1_even | 4 | 0 | 1 | 5 | 5
t1_even | 6 | 0 | 2 | 2 | 9
t2_even | 0 | 0 | 2 | 3 | 9
t2_even | 1 | 0 | 1 | 4 | 4
t2_even | 3 | 0 | 1 | 7 | 7
t2_even | 4 | 0 | 1 | 6 | 6
t2_even | 5 | 0 | 1 | 8 | 8
t2_even | 6 | 0 | 1 | 2 | 2
t2_even | 7 | 0 | 2 | 1 | 5
t3_key | 1 | 0 | 2 | 1 | 6
t3_key | 2 | 0 | 2 | 4 | 7
t3_key | 3 | 0 | 2 | 3 | 8
t3_key | 4 | 0 | 1 | 5 | 5
t3_key | 6 | 0 | 2 | 2 | 9
t4_all | 0 | 0 | 9 | 1 | 9
t4_all | 2 | 0 | 9 | 1 | 9
t4_all | 4 | 0 | 9 | 1 | 9
t4_all | 6 | 0 | 9 | 1 | 9
(21 rows)
单表查询
testdb=# explain select col1 from t1_even where id =5;
QUERY PLAN
----------------------------------------------------------
XN Seq Scan on t1_even (cost=0.00..0.11 rows=1 width=5)
Filter: (id = 5)
(2 rows)
testdb=# explain select col1 from t1_even where id in (1,3,5,7,9);
QUERY PLAN
----------------------------------------------------------------------
XN Seq Scan on t1_even (cost=0.00..0.20 rows=5 width=5)
Filter: ((id = 1) OR (id = 3) OR (id = 5) OR (id = 7) OR (id = 9))
(2 rows)
testdb=# explain select col1 from t3_key where t1_id = 5;
QUERY PLAN
---------------------------------------------------------
XN Seq Scan on t3_key (cost=0.00..0.11 rows=1 width=5)
Filter: (t1_id = 5)
(2 rows)
testdb=# explain select col1 from t4_all where t1_id in (1,3,5,7,9);
QUERY PLAN
-------------------------------------------------------------------------------------
XN Seq Scan on t4_all (cost=0.00..0.09 rows=5 width=5)
Filter: ((t1_id = 1) OR (t1_id = 3) OR (t1_id = 5) OR (t1_id = 7) OR (t1_id = 9))
(2 rows)
testdb=# explain select col1 from t4_all where t1_id = 5;
QUERY PLAN
---------------------------------------------------------
XN Seq Scan on t4_all (cost=0.00..0.01 rows=1 width=5)
Filter: (t1_id = 5)
(2 rows)
通过每条语句的 Cost 我们可以看出, 当表的分配形式为 ALL 时, 全表查询所花费的 Cost最少。
多表联接
-- t1 与 t2 连接
testdb=# select t1.id , t1.col1, t2.t1_id, t2.col1
testdb-# from t1_even t1 , t2_even t2 where t1.id = t2.t1_id;
id | col1 | t1_id | col1
----+------+-------+------
3 | c | 3 | c
8 | h | 8 | h
5 | e | 5 | e
1 | a | 1 | a
6 | f | 6 | f
2 | b | 2 | b
9 | i | 9 | i
4 | d | 4 | d
7 | g | 7 | g
(9 rows)
testdb=# explain select t1.id , t1.col1, t2.t1_id, t2.col1
from t1_even t1 , t2_even t2 where t1.id = t2.t1_id;
QUERY PLAN
---------------------------------------------------------------------------
XN Hash Join DS_DIST_INNER (cost=0.11..4050000.40 rows=9 width=344)
Inner Dist Key: t2.t1_id
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Seq Scan on t1_even t1 (cost=0.00..0.09 rows=9 width=172)
-> XN Hash (cost=0.09..0.09 rows=9 width=172)
-> XN Seq Scan on t2_even t2 (cost=0.00..0.09 rows=9 width=172)
(6 rows)
testdb=# select t1.col1, t2.col1 from t1_even t1, t2_even t2 where t1.id = t2.t1_id and t1.id = 3;
col1 | col1
------+------
c | c
(1 row)
testdb=# explain select t1.col1, t2.col1 from t1_even t1, t2_even t2 where t1.id = t2.t1_id and t1.id = 3
;
QUERY PLAN
-------------------------------------------------------------------------
XN Hash Join DS_DIST_INNER (cost=0.12..50000.25 rows=1 width=10)
Inner Dist Key: t2.t1_id
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Seq Scan on t1_even t1 (cost=0.00..0.11 rows=1 width=9)
Filter: (id = 3)
-> XN Hash (cost=0.11..0.11 rows=1 width=9)
-> XN Seq Scan on t2_even t2 (cost=0.00..0.11 rows=1 width=9)
Filter: (t1_id = 3)
(8 rows)
-- t1 与 t3 联接
testdb=# select t1.id , t1.col1, t3.t1_id, t3.col1
testdb-# from t1_even t1 , t3_key t3 where t1.id = t3.t1_id;
id | col1 | t1_id | col1
----+------+-------+------
5 | e | 5 | e
4 | d | 4 | d
7 | g | 7 | g
2 | b | 2 | b
9 | i | 9 | i
3 | c | 3 | c
8 | h | 8 | h
1 | a | 1 | a
6 | f | 6 | f
(9 rows)
testdb=# explain select t1.id , t1.col1, t3.t1_id, t3.col1
from t1_even t1 , t3_key t3 where t1.id = t3.t1_id;
QUERY PLAN
------------------------------------------------------------------------------
XN Hash Join DS_DIST_NONE (cost=0.11..0.40 rows=9 width=344)
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Seq Scan on t1_even t1 (cost=0.00..0.09 rows=9 width=172)
-> XN Hash (cost=0.09..0.09 rows=9 width=172)
-> XN Seq Scan on t3_key t3 (cost=0.00..0.09 rows=9 width=172)
----- Tables missing statistics: t3_key -----
----- Update statistics by running the ANALYZE command on these tables -----
(7 rows)
testdb=# select t1.col1, t3.col1 from t1_even t1, t3_key t3 where t1.id = t3.t1_id and t1.id > 6;
col1 | col1
------+------
h | h
g | g
i | i
(3 rows)
testdb=# explain select t1.col1, t3.col1 from t1_even t1, t3_key t3 where t1.id = t3.t1_id and t1.id > 6;
QUERY PLAN
------------------------------------------------------------------------
XN Hash Join DS_DIST_NONE (cost=0.12..0.30 rows=2 width=10)
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Seq Scan on t1_even t1 (cost=0.00..0.11 rows=4 width=9)
Filter: (id > 6)
-> XN Hash (cost=0.11..0.11 rows=4 width=9)
-> XN Seq Scan on t3_key t3 (cost=0.00..0.11 rows=4 width=9)
Filter: (t1_id > 6)
(7 rows)
-- t1 与 t4 联接
testdb=# select t1.id , t1.col1, t4.t1_id, t4.col1
from t1_even t1 , t4_all t4 where t1.id = t4.t1_id;
id | col1 | t1_id | col1
----+------+-------+------
1 | a | 1 | a
6 | f | 6 | f
3 | c | 3 | c
8 | h | 8 | h
5 | e | 5 | e
4 | d | 4 | d
7 | g | 7 | g
2 | b | 2 | b
9 | i | 9 | i
(9 rows)
testdb=# explain select t1.id , t1.col1, t4.t1_id, t4.col1
from t1_even t1 , t4_all t4 where t1.id = t4.t1_id;
QUERY PLAN
------------------------------------------------------------------------
XN Hash Join DS_DIST_ALL_NONE (cost=0.11..0.40 rows=9 width=181)
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Seq Scan on t1_even t1 (cost=0.00..0.09 rows=9 width=172)
-> XN Hash (cost=0.09..0.09 rows=9 width=9)
-> XN Seq Scan on t4_all t4 (cost=0.00..0.09 rows=9 width=9)
(5 rows)
testdb=# select t1.id , t1.col1, t4.t1_id, t4.col1
from t1_even t1 , t4_all t4 where t1.id = t4.t1_id and t1.id = 5;
id | col1 | t1_id | col1
----+------+-------+------
5 | e | 5 | e
(1 row)
testdb=# explain select t1.id , t1.col1, t4.t1_id, t4.col1
from t1_even t1 , t4_all t4 where t1.id = t4.t1_id and t1.id = 5;
QUERY PLAN
------------------------------------------------------------------------
XN Hash Join DS_DIST_ALL_NONE (cost=0.01..0.15 rows=1 width=18)
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Seq Scan on t1_even t1 (cost=0.00..0.11 rows=1 width=9)
Filter: (id = 5)
-> XN Hash (cost=0.01..0.01 rows=1 width=9)
-> XN Seq Scan on t4_all t4 (cost=0.00..0.01 rows=1 width=9)
Filter: (t1_id = 5)
(7 rows)
-- t1 与 t2, t3 联接
testdb=# select t1.col1, t2.col1, t3.col1 from t1_even t1, t2_even t2, t3_key t3 where t1.id = t2.t1_id and t1.id = t3.t1_id ;
col1 | col1 | col1
------+------+------
d | d | d
g | g | g
e | e | e
b | b | b
i | i | i
c | c | c
h | h | h
a | a | a
f | f | f
(9 rows)
testdb=# explain select t1.col1, t2.col1, t3.col1 from t1_even t1, t2_even t2, t3_key t3 where t1.id = t2.t1_id and t1.id = t3.t1_id ;
QUERY PLAN
------------------------------------------------------------------------------
XN Hash Join DS_DIST_INNER (cost=0.22..450000.72 rows=9 width=15)
Inner Dist Key: t2.t1_id
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Hash Join DS_DIST_NONE (cost=0.11..0.40 rows=9 width=18)
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Seq Scan on t1_even t1 (cost=0.00..0.09 rows=9 width=9)
-> XN Hash (cost=0.09..0.09 rows=9 width=9)
-> XN Seq Scan on t3_key t3 (cost=0.00..0.09 rows=9 width=9)
-> XN Hash (cost=0.09..0.09 rows=9 width=9)
-> XN Seq Scan on t2_even t2 (cost=0.00..0.09 rows=9 width=9)
(10 rows)
testdb=# select t1.col1, t2.col1, t3.col1 from t1_even t1, t2_even t2, t3_key t3 where t1.id = t2.t1_id and t1.id = t3.t1_id and t1.id = 5;
col1 | col1 | col1
------+------+------
e | e | e
(1 row)
testdb=# explain select t1.col1, t2.col1, t3.col1 from t1_even t1, t2_even t2, t3_key t3 where t1.id = t2.t1_id and t1.id = t3.t1_id and t1.id = 5;
QUERY PLAN
------------------------------------------------------------------------------
XN Hash Join DS_DIST_INNER (cost=0.23..50000.39 rows=1 width=15)
Inner Dist Key: t2.t1_id
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Hash Join DS_DIST_NONE (cost=0.12..0.25 rows=1 width=18)
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Seq Scan on t1_even t1 (cost=0.00..0.11 rows=1 width=9)
Filter: (id = 5)
-> XN Hash (cost=0.11..0.11 rows=1 width=9)
-> XN Seq Scan on t3_key t3 (cost=0.00..0.11 rows=1 width=9)
Filter: (t1_id = 5)
-> XN Hash (cost=0.11..0.11 rows=1 width=9)
-> XN Seq Scan on t2_even t2 (cost=0.00..0.11 rows=1 width=9)
Filter: (t1_id = 5)
(13 rows)
-- t1 与 t2, t4 联接
testdb=# select t1.col1, t2.col1, t4.col1 from t1_even t1, t2_even t2, t4_all t4 where t1.id = t2.t1_id and t1.id = t4.t1_id ;
col1 | col1 | col1
------+------+------
e | e | e
d | d | d
g | g | g
c | c | c
h | h | h
a | a | a
f | f | f
b | b | b
i | i | i
(9 rows)
testdb=# explain select t1.col1, t2.col1, t4.col1 from t1_even t1, t2_even t2, t4_all t4 where t1.id = t2.t1_id and t1.id = t4.t1_id ;
QUERY PLAN
------------------------------------------------------------------------------
XN Hash Join DS_DIST_INNER (cost=0.22..450000.72 rows=9 width=15)
Inner Dist Key: t2.t1_id
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Hash Join DS_DIST_ALL_NONE (cost=0.11..0.40 rows=9 width=18)
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Seq Scan on t1_even t1 (cost=0.00..0.09 rows=9 width=9)
-> XN Hash (cost=0.09..0.09 rows=9 width=9)
-> XN Seq Scan on t4_all t4 (cost=0.00..0.09 rows=9 width=9)
-> XN Hash (cost=0.09..0.09 rows=9 width=9)
-> XN Seq Scan on t2_even t2 (cost=0.00..0.09 rows=9 width=9)
(10 rows)
testdb=# select t1.col1, t2.col1, t4.col1 from t1_even t1, t2_even t2, t4_all t4 where t1.id = t2.t1_id and t1.id = t4.t1_id and t1.id = 5;
col1 | col1 | col1
------+------+------
e | e | e
(1 row)
testdb=# explain select t1.col1, t2.col1, t4.col1 from t1_even t1, t2_even t2, t4_all t4 where t1.id = t2.t1_id and t1.id = t4.t1_id and t1.id = 5;
QUERY PLAN
------------------------------------------------------------------------------
XN Hash Join DS_DIST_INNER (cost=0.13..50000.29 rows=1 width=15)
Inner Dist Key: t2.t1_id
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Hash Join DS_DIST_ALL_NONE (cost=0.01..0.15 rows=1 width=18)
Hash Cond: ("outer".id = "inner".t1_id)
-> XN Seq Scan on t1_even t1 (cost=0.00..0.11 rows=1 width=9)
Filter: (id = 5)
-> XN Hash (cost=0.01..0.01 rows=1 width=9)
-> XN Seq Scan on t4_all t4 (cost=0.00..0.01 rows=1 width=9)
Filter: (t1_id = 5)
-> XN Hash (cost=0.11..0.11 rows=1 width=9)
-> XN Seq Scan on t2_even t2 (cost=0.00..0.11 rows=1 width=9)
Filter: (t1_id = 5)
(13 rows)
连接时的执行计划:
- 当联接一个分配方式为ALL的表时, Join 方式为 DS_DIST_ALL_NONE,表示 “不需要重新分配,因为表的分配方式为 ALL,数据已经存在在每个节点”。
- 在大部分的分配方式为 even 情况下时,Join方式为 DS_DIST_INNER, 表示 “内部表被重新分配”。这是因为表的数据都分布在不同的节点中。
- 在有些情况下,我们可以看到 DS_DIST_NONE, 表示 “有没有表被重新分配,在没有在节点之间移动数据的情况下联接了相应的片”。