AWS Redshift - 不同分配方式表及多表联接的执行计划

– 准备测试表及数据

create table t1_even (id int, col1 varchar(100), col2 varchar(100)) diststyle key distkey (id);
create table t2_even (t1_id int , col1 varchar(100), col2 varchar(100)) diststyle even;
create table t3_key (t1_id int, col1 varchar(100), col2 varchar(100)) diststyle key distkey (t1_id);
create table t4_all (t1_id int, col1 varchar(100), col2 varchar(100)) diststyle all;

insert into t1_even values (1,'a','def');
insert into t1_even values (2,'b','def');
insert into t1_even values (3,'c','def');
insert into t1_even values (4,'d','def');
insert into t1_even values (5,'e','def');
insert into t1_even values (6,'f','def');
insert into t1_even values (7,'g','def');
insert into t1_even values (8,'h','def');
insert into t1_even values (9,'i','def');

insert into t2_even values (1,'a','def');
insert into t2_even values (2,'b','def');
insert into t2_even values (3,'c','def');
insert into t2_even values (4,'d','def');
insert into t2_even values (5,'e','def');
insert into t2_even values (6,'f','def');
insert into t2_even values (7,'g','def');
insert into t2_even values (8,'h','def');
insert into t2_even values (9,'i','def');

insert into t3_key values (1,'a','def');
insert into t3_key values (2,'b','def');
insert into t3_key values (3,'c','def');
insert into t3_key values (4,'d','def');
insert into t3_key values (5,'e','def');
insert into t3_key values (6,'f','def');
insert into t3_key values (7,'g','def');
insert into t3_key values (8,'h','def');
insert into t3_key values (9,'i','def');

insert into t4_all values (1,'a','def');
insert into t4_all values (2,'b','def');
insert into t4_all values (3,'c','def');
insert into t4_all values (4,'d','def');
insert into t4_all values (5,'e','def');
insert into t4_all values (6,'f','def');
insert into t4_all values (7,'g','def');
insert into t4_all values (8,'h','def');
insert into t4_all values (9,'i','def');

查看数据分布

testdb=# select name, slice, col, num_values as rows, minvalue, maxvalue
from svv_diskusage
where name in ('t1_even', 't2_even' ,'t3_key','t4_all') and col=0 and rows>0
order by name, slice, col;
     name     | slice | col | rows | minvalue | maxvalue
--------------+-------+-----+------+----------+----------
 t1_even      |     1 |   0 |    2 |        1 |        6
 t1_even      |     2 |   0 |    2 |        4 |        7
 t1_even      |     3 |   0 |    2 |        3 |        8
 t1_even      |     4 |   0 |    1 |        5 |        5
 t1_even      |     6 |   0 |    2 |        2 |        9
 t2_even      |     0 |   0 |    2 |        3 |        9
 t2_even      |     1 |   0 |    1 |        4 |        4
 t2_even      |     3 |   0 |    1 |        7 |        7
 t2_even      |     4 |   0 |    1 |        6 |        6
 t2_even      |     5 |   0 |    1 |        8 |        8
 t2_even      |     6 |   0 |    1 |        2 |        2
 t2_even      |     7 |   0 |    2 |        1 |        5
 t3_key       |     1 |   0 |    2 |        1 |        6
 t3_key       |     2 |   0 |    2 |        4 |        7
 t3_key       |     3 |   0 |    2 |        3 |        8
 t3_key       |     4 |   0 |    1 |        5 |        5
 t3_key       |     6 |   0 |    2 |        2 |        9
 t4_all       |     0 |   0 |    9 |        1 |        9
 t4_all       |     2 |   0 |    9 |        1 |        9
 t4_all       |     4 |   0 |    9 |        1 |        9
 t4_all       |     6 |   0 |    9 |        1 |        9
(21 rows)

单表查询

testdb=# explain select col1 from t1_even where id =5;
                        QUERY PLAN
----------------------------------------------------------
 XN Seq Scan on t1_even  (cost=0.00..0.11 rows=1 width=5)
   Filter: (id = 5)
(2 rows)

testdb=# explain select col1 from t1_even where id in (1,3,5,7,9);
                              QUERY PLAN
----------------------------------------------------------------------
 XN Seq Scan on t1_even  (cost=0.00..0.20 rows=5 width=5)
   Filter: ((id = 1) OR (id = 3) OR (id = 5) OR (id = 7) OR (id = 9))
(2 rows)

testdb=# explain select col1 from t3_key where t1_id = 5;
                       QUERY PLAN
---------------------------------------------------------
 XN Seq Scan on t3_key  (cost=0.00..0.11 rows=1 width=5)
   Filter: (t1_id = 5)
(2 rows)

testdb=# explain select col1 from t4_all where t1_id in (1,3,5,7,9);
                                     QUERY PLAN
-------------------------------------------------------------------------------------
 XN Seq Scan on t4_all  (cost=0.00..0.09 rows=5 width=5)
   Filter: ((t1_id = 1) OR (t1_id = 3) OR (t1_id = 5) OR (t1_id = 7) OR (t1_id = 9))
(2 rows)

testdb=# explain select col1 from t4_all where t1_id = 5;
                       QUERY PLAN
---------------------------------------------------------
 XN Seq Scan on t4_all  (cost=0.00..0.01 rows=1 width=5)
   Filter: (t1_id = 5)
(2 rows)

通过每条语句的 Cost 我们可以看出, 当表的分配形式为 ALL 时, 全表查询所花费的 Cost最少。

多表联接

-- t1 与 t2 连接
testdb=# select t1.id , t1.col1, t2.t1_id, t2.col1
testdb-# from  t1_even t1 , t2_even t2 where t1.id = t2.t1_id;
 id | col1 | t1_id | col1
----+------+-------+------
  3 | c    |     3 | c
  8 | h    |     8 | h
  5 | e    |     5 | e
  1 | a    |     1 | a
  6 | f    |     6 | f
  2 | b    |     2 | b
  9 | i    |     9 | i
  4 | d    |     4 | d
  7 | g    |     7 | g
(9 rows)

testdb=# explain select t1.id , t1.col1, t2.t1_id, t2.col1
from  t1_even t1 , t2_even t2 where t1.id = t2.t1_id;
                                QUERY PLAN
---------------------------------------------------------------------------
 XN Hash Join DS_DIST_INNER  (cost=0.11..4050000.40 rows=9 width=344)
   Inner Dist Key: t2.t1_id
   Hash Cond: ("outer".id = "inner".t1_id)
   ->  XN Seq Scan on t1_even t1  (cost=0.00..0.09 rows=9 width=172)
   ->  XN Hash  (cost=0.09..0.09 rows=9 width=172)
         ->  XN Seq Scan on t2_even t2  (cost=0.00..0.09 rows=9 width=172)
(6 rows)


testdb=# select t1.col1, t2.col1 from t1_even t1, t2_even t2 where t1.id = t2.t1_id and t1.id = 3;
 col1 | col1
------+------
 c    | c
(1 row)

testdb=# explain select t1.col1, t2.col1 from t1_even t1, t2_even t2 where t1.id = t2.t1_id and t1.id = 3
;
                               QUERY PLAN
-------------------------------------------------------------------------
 XN Hash Join DS_DIST_INNER  (cost=0.12..50000.25 rows=1 width=10)
   Inner Dist Key: t2.t1_id
   Hash Cond: ("outer".id = "inner".t1_id)
   ->  XN Seq Scan on t1_even t1  (cost=0.00..0.11 rows=1 width=9)
         Filter: (id = 3)
   ->  XN Hash  (cost=0.11..0.11 rows=1 width=9)
         ->  XN Seq Scan on t2_even t2  (cost=0.00..0.11 rows=1 width=9)
               Filter: (t1_id = 3)
(8 rows)
-- t1 与 t3 联接
testdb=# select t1.id , t1.col1, t3.t1_id, t3.col1
testdb-# from  t1_even t1 , t3_key t3 where t1.id = t3.t1_id;
 id | col1 | t1_id | col1
----+------+-------+------
  5 | e    |     5 | e
  4 | d    |     4 | d
  7 | g    |     7 | g
  2 | b    |     2 | b
  9 | i    |     9 | i
  3 | c    |     3 | c
  8 | h    |     8 | h
  1 | a    |     1 | a
  6 | f    |     6 | f
(9 rows)

testdb=# explain select t1.id , t1.col1, t3.t1_id, t3.col1
from  t1_even t1 , t3_key t3 where t1.id = t3.t1_id;
                                  QUERY PLAN
------------------------------------------------------------------------------
 XN Hash Join DS_DIST_NONE  (cost=0.11..0.40 rows=9 width=344)
   Hash Cond: ("outer".id = "inner".t1_id)
   ->  XN Seq Scan on t1_even t1  (cost=0.00..0.09 rows=9 width=172)
   ->  XN Hash  (cost=0.09..0.09 rows=9 width=172)
         ->  XN Seq Scan on t3_key t3  (cost=0.00..0.09 rows=9 width=172)
 ----- Tables missing statistics: t3_key -----
 ----- Update statistics by running the ANALYZE command on these tables -----
(7 rows)




testdb=# select t1.col1, t3.col1 from t1_even t1, t3_key t3 where t1.id = t3.t1_id and t1.id > 6;
 col1 | col1
------+------
 h    | h
 g    | g
 i    | i
(3 rows)

testdb=# explain select t1.col1, t3.col1 from t1_even t1, t3_key t3 where t1.id = t3.t1_id and t1.id > 6;
                               QUERY PLAN
------------------------------------------------------------------------
 XN Hash Join DS_DIST_NONE  (cost=0.12..0.30 rows=2 width=10)
   Hash Cond: ("outer".id = "inner".t1_id)
   ->  XN Seq Scan on t1_even t1  (cost=0.00..0.11 rows=4 width=9)
         Filter: (id > 6)
   ->  XN Hash  (cost=0.11..0.11 rows=4 width=9)
         ->  XN Seq Scan on t3_key t3  (cost=0.00..0.11 rows=4 width=9)
               Filter: (t1_id > 6)
(7 rows)
-- t1 与 t4 联接
testdb=# select t1.id , t1.col1, t4.t1_id, t4.col1
from  t1_even t1 , t4_all t4 where t1.id = t4.t1_id;
 id | col1 | t1_id | col1
----+------+-------+------
  1 | a    |     1 | a
  6 | f    |     6 | f
  3 | c    |     3 | c
  8 | h    |     8 | h
  5 | e    |     5 | e
  4 | d    |     4 | d
  7 | g    |     7 | g
  2 | b    |     2 | b
  9 | i    |     9 | i
(9 rows)

testdb=# explain select t1.id , t1.col1, t4.t1_id, t4.col1
from  t1_even t1 , t4_all t4 where t1.id = t4.t1_id;
                               QUERY PLAN
------------------------------------------------------------------------
 XN Hash Join DS_DIST_ALL_NONE  (cost=0.11..0.40 rows=9 width=181)
   Hash Cond: ("outer".id = "inner".t1_id)
   ->  XN Seq Scan on t1_even t1  (cost=0.00..0.09 rows=9 width=172)
   ->  XN Hash  (cost=0.09..0.09 rows=9 width=9)
         ->  XN Seq Scan on t4_all t4  (cost=0.00..0.09 rows=9 width=9)
(5 rows)


testdb=# select t1.id , t1.col1, t4.t1_id, t4.col1
from  t1_even t1 , t4_all t4 where t1.id = t4.t1_id and t1.id = 5;
 id | col1 | t1_id | col1
----+------+-------+------
  5 | e    |     5 | e
(1 row)

testdb=# explain select t1.id , t1.col1, t4.t1_id, t4.col1
from  t1_even t1 , t4_all t4 where t1.id = t4.t1_id and t1.id = 5;
                               QUERY PLAN
------------------------------------------------------------------------
 XN Hash Join DS_DIST_ALL_NONE  (cost=0.01..0.15 rows=1 width=18)
   Hash Cond: ("outer".id = "inner".t1_id)
   ->  XN Seq Scan on t1_even t1  (cost=0.00..0.11 rows=1 width=9)
         Filter: (id = 5)
   ->  XN Hash  (cost=0.01..0.01 rows=1 width=9)
         ->  XN Seq Scan on t4_all t4  (cost=0.00..0.01 rows=1 width=9)
               Filter: (t1_id = 5)
(7 rows)
-- t1 与 t2, t3 联接
testdb=# select t1.col1, t2.col1, t3.col1 from t1_even t1, t2_even t2, t3_key t3 where t1.id = t2.t1_id and t1.id = t3.t1_id ;
 col1 | col1 | col1
------+------+------
 d    | d    | d
 g    | g    | g
 e    | e    | e
 b    | b    | b
 i    | i    | i
 c    | c    | c
 h    | h    | h
 a    | a    | a
 f    | f    | f
(9 rows)

testdb=# explain select t1.col1, t2.col1, t3.col1 from t1_even t1, t2_even t2, t3_key t3 where t1.id = t2.t1_id and t1.id = t3.t1_id ;
                                  QUERY PLAN
------------------------------------------------------------------------------
 XN Hash Join DS_DIST_INNER  (cost=0.22..450000.72 rows=9 width=15)
   Inner Dist Key: t2.t1_id
   Hash Cond: ("outer".id = "inner".t1_id)
   ->  XN Hash Join DS_DIST_NONE  (cost=0.11..0.40 rows=9 width=18)
         Hash Cond: ("outer".id = "inner".t1_id)
         ->  XN Seq Scan on t1_even t1  (cost=0.00..0.09 rows=9 width=9)
         ->  XN Hash  (cost=0.09..0.09 rows=9 width=9)
               ->  XN Seq Scan on t3_key t3  (cost=0.00..0.09 rows=9 width=9)
   ->  XN Hash  (cost=0.09..0.09 rows=9 width=9)
         ->  XN Seq Scan on t2_even t2  (cost=0.00..0.09 rows=9 width=9)
(10 rows)

testdb=# select t1.col1, t2.col1, t3.col1 from t1_even t1, t2_even t2, t3_key t3 where t1.id = t2.t1_id and t1.id = t3.t1_id and t1.id = 5;
 col1 | col1 | col1
------+------+------
 e    | e    | e
(1 row)

testdb=# explain select t1.col1, t2.col1, t3.col1 from t1_even t1, t2_even t2, t3_key t3 where t1.id = t2.t1_id and t1.id = t3.t1_id and t1.id = 5;
                                  QUERY PLAN
------------------------------------------------------------------------------
 XN Hash Join DS_DIST_INNER  (cost=0.23..50000.39 rows=1 width=15)
   Inner Dist Key: t2.t1_id
   Hash Cond: ("outer".id = "inner".t1_id)
   ->  XN Hash Join DS_DIST_NONE  (cost=0.12..0.25 rows=1 width=18)
         Hash Cond: ("outer".id = "inner".t1_id)
         ->  XN Seq Scan on t1_even t1  (cost=0.00..0.11 rows=1 width=9)
               Filter: (id = 5)
         ->  XN Hash  (cost=0.11..0.11 rows=1 width=9)
               ->  XN Seq Scan on t3_key t3  (cost=0.00..0.11 rows=1 width=9)
                     Filter: (t1_id = 5)
   ->  XN Hash  (cost=0.11..0.11 rows=1 width=9)
         ->  XN Seq Scan on t2_even t2  (cost=0.00..0.11 rows=1 width=9)
               Filter: (t1_id = 5)
(13 rows)
-- t1 与 t2, t4 联接

testdb=# select t1.col1, t2.col1, t4.col1 from t1_even t1, t2_even t2, t4_all t4 where t1.id = t2.t1_id and t1.id = t4.t1_id ;
 col1 | col1 | col1
------+------+------
 e    | e    | e
 d    | d    | d
 g    | g    | g
 c    | c    | c
 h    | h    | h
 a    | a    | a
 f    | f    | f
 b    | b    | b
 i    | i    | i
(9 rows)

testdb=# explain select t1.col1, t2.col1, t4.col1 from t1_even t1, t2_even t2, t4_all t4 where t1.id = t2.t1_id and t1.id = t4.t1_id ;
                                  QUERY PLAN
------------------------------------------------------------------------------
 XN Hash Join DS_DIST_INNER  (cost=0.22..450000.72 rows=9 width=15)
   Inner Dist Key: t2.t1_id
   Hash Cond: ("outer".id = "inner".t1_id)
   ->  XN Hash Join DS_DIST_ALL_NONE  (cost=0.11..0.40 rows=9 width=18)
         Hash Cond: ("outer".id = "inner".t1_id)
         ->  XN Seq Scan on t1_even t1  (cost=0.00..0.09 rows=9 width=9)
         ->  XN Hash  (cost=0.09..0.09 rows=9 width=9)
               ->  XN Seq Scan on t4_all t4  (cost=0.00..0.09 rows=9 width=9)
   ->  XN Hash  (cost=0.09..0.09 rows=9 width=9)
         ->  XN Seq Scan on t2_even t2  (cost=0.00..0.09 rows=9 width=9)
(10 rows)

testdb=# select t1.col1, t2.col1, t4.col1 from t1_even t1, t2_even t2, t4_all t4 where t1.id = t2.t1_id and t1.id = t4.t1_id and t1.id = 5;
 col1 | col1 | col1
------+------+------
 e    | e    | e
(1 row)

testdb=# explain select t1.col1, t2.col1, t4.col1 from t1_even t1, t2_even t2, t4_all t4 where t1.id = t2.t1_id and t1.id = t4.t1_id and t1.id = 5;
                                  QUERY PLAN
------------------------------------------------------------------------------
 XN Hash Join DS_DIST_INNER  (cost=0.13..50000.29 rows=1 width=15)
   Inner Dist Key: t2.t1_id
   Hash Cond: ("outer".id = "inner".t1_id)
   ->  XN Hash Join DS_DIST_ALL_NONE  (cost=0.01..0.15 rows=1 width=18)
         Hash Cond: ("outer".id = "inner".t1_id)
         ->  XN Seq Scan on t1_even t1  (cost=0.00..0.11 rows=1 width=9)
               Filter: (id = 5)
         ->  XN Hash  (cost=0.01..0.01 rows=1 width=9)
               ->  XN Seq Scan on t4_all t4  (cost=0.00..0.01 rows=1 width=9)
                     Filter: (t1_id = 5)
   ->  XN Hash  (cost=0.11..0.11 rows=1 width=9)
         ->  XN Seq Scan on t2_even t2  (cost=0.00..0.11 rows=1 width=9)
               Filter: (t1_id = 5)
(13 rows)

连接时的执行计划:

  1. 当联接一个分配方式为ALL的表时, Join 方式为 DS_DIST_ALL_NONE,表示 “不需要重新分配,因为表的分配方式为 ALL,数据已经存在在每个节点”。
  2. 在大部分的分配方式为 even 情况下时,Join方式为 DS_DIST_INNER, 表示 “内部表被重新分配”。这是因为表的数据都分布在不同的节点中。
  3. 在有些情况下,我们可以看到 DS_DIST_NONE, 表示 “有没有表被重新分配,在没有在节点之间移动数据的情况下联接了相应的片”。
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值