测试多列DK的选择性问题

为了测试多个DK分布键情况下,多表关联是否可以镜像segment过滤,测试如下:
[gpadmin@gtlions50 ~]$ psql gtlions
psql (8.2.15)
Type "help" for help.


gtlions=# create table gtt1(id int,name character varying(5)) distributed by (id,name);
CREATE TABLE
gtlions=# create table gtt2(id int,name character varying(5)) distributed by (id,name);
CREATE TABLE
gtlions=# insert into gtt1 values(1,'a');
INSERT 0 1
gtlions=# insert into gtt1 values(11,'b');
INSERT 0 1
gtlions=# select gp_segment_id,* from gtt1 order by 1;
 gp_segment_id | id | name 
---------------+----+------
             2 | 11 | b
             3 |  1 | a
(2 rows)


gtlions=# insert into gtt1 values(111,'c');
INSERT 0 1
gtlions=# insert into gtt1 values(1111,'d');
INSERT 0 1
gtlions=# select gp_segment_id,* from gtt1 order by 1;
 gp_segment_id |  id  | name 
---------------+------+------
             0 | 1111 | d
             2 |   11 | b
             3 |  111 | c
             3 |    1 | a
(4 rows)


gtlions=# insert into gtt2 select * from gtt1;
INSERT 0 4
gtlions=# explain analyze select * from gtt1 where id=11;
                                               QUERY PLAN                                               
--------------------------------------------------------------------------------------------------------
 Gather Motion 4:1  (slice1; segments: 4)  (cost=0.00..1.01 rows=1 width=6)
   Rows out:  1 rows at destination with 3.593 ms to first row, 4.067 ms to end, start offset by 28 ms.
   ->  Seq Scan on gtt1  (cost=0.00..1.01 rows=1 width=6)
         Filter: id = 11
         Rows out:  1 rows (seg2) with 0.159 ms to first row, 0.160 ms to end, start offset by 359 ms.
 Slice statistics:
   (slice0)    Executor memory: 139K bytes.
   (slice1)    Executor memory: 157K bytes avg x 4 workers, 157K bytes max (seg0).
 Statement statistics:
   Memory used: 128000K bytes
 Total runtime: 32.320 ms
(11 rows)


gtlions=#  explain analyze select * from gtt1 where name='b';
                                               QUERY PLAN                                               
--------------------------------------------------------------------------------------------------------
 Gather Motion 4:1  (slice1; segments: 4)  (cost=0.00..1.01 rows=1 width=6)
   Rows out:  1 rows at destination with 3.537 ms to first row, 3.538 ms to end, start offset by 31 ms.
   ->  Seq Scan on gtt1  (cost=0.00..1.01 rows=1 width=6)
         Filter: name::text = 'b'::text
         Rows out:  1 rows (seg2) with 0.231 ms to first row, 0.233 ms to end, start offset by -250 ms.
 Slice statistics:
   (slice0)    Executor memory: 139K bytes.
   (slice1)    Executor memory: 157K bytes avg x 4 workers, 157K bytes max (seg0).
 Statement statistics:
   Memory used: 128000K bytes
 Total runtime: 34.830 ms
(11 rows)


gtlions=# explain analyze select * from gtt1 where id=11 and name='b';
                                                QUERY PLAN                                                 
-----------------------------------------------------------------------------------------------------------
 Gather Motion 1:1  (slice1; segments: 1)  (cost=0.00..1.01 rows=1 width=6)
   Rows out:  1 rows at destination with 2.032 ms to first row, 2.033 ms to end, start offset by 0.500 ms.
   ->  Seq Scan on gtt1  (cost=0.00..1.01 rows=1 width=6)
         Filter: id = 11 AND name::text = 'b'::text
         Rows out:  1 rows with 0.062 ms to first row, 0.065 ms to end, start offset by 328 ms.
 Slice statistics:
   (slice0)    Executor memory: 139K bytes.
   (slice1)    Executor memory: 131K bytes (seg2).
 Statement statistics:
   Memory used: 128000K bytes
 Total runtime: 2.987 ms
(11 rows)


gtlions=# explain analyze select * from gtt1 t1,gtt2 t2 where t1.id=t2.id;
                                                                            QUERY PLAN                                                                       
     
-------------------------------------------------------------------------------------------------------------------------------------------------------------
-----
 Gather Motion 4:1  (slice2; segments: 4)  (cost=1.11..4.20 rows=1 width=12)
   Rows out:  4 rows at destination with 27 ms to first row, 29 ms to end, start offset by 54 ms.
   ->  Hash Join  (cost=1.11..4.20 rows=1 width=12)
         Hash Cond: t2.id = t1.id
         Rows out:  Avg 1.3 rows x 3 workers.  Max 2 rows (seg3) with 18 ms to first row, 21 ms to end, start offset by 367 ms.
         Executor memory:  1K bytes avg, 1K bytes max (seg0).
         Work_mem used:  1K bytes avg, 1K bytes max (seg0). Workfile: (0 spilling, 0 reused)
         (seg3)   Hash chain length 1.0 avg, 1 max, using 4 of 524341 buckets.
         ->  Seq Scan on gtt2 t2  (cost=0.00..3.04 rows=1 width=6)
               Rows out:  Avg 1.3 rows x 3 workers.  Max 2 rows (seg3) with 0.219 ms to first row, 0.220 ms to end, start offset by 385 ms.
         ->  Hash  (cost=1.06..1.06 rows=1 width=6)
               Rows in:  Avg 4.0 rows x 4 workers.  Max 4 rows (seg0) with 0.197 ms to end, start offset by -607 ms.
               ->  Broadcast Motion 4:4  (slice1; segments: 4)  (cost=0.00..1.06 rows=1 width=6)
                     Rows out:  Avg 4.0 rows x 4 workers at destination.  Max 4 rows (seg0) with 0.107 ms to first row, 0.118 ms to end, start offset by -607
 ms.
                     ->  Seq Scan on gtt1 t1  (cost=0.00..1.01 rows=1 width=6)
                           Rows out:  Avg 1.3 rows x 3 workers.  Max 2 rows (seg3) with 3.709 ms to first row, 3.711 ms to end, start offset by 369 ms.
 Slice statistics:
   (slice0)    Executor memory: 184K bytes.
   (slice1)    Executor memory: 212K bytes avg x 4 workers, 216K bytes max (seg0).
   (slice2)    Executor memory: 8401K bytes avg x 4 workers, 8401K bytes max (seg0).  Work_mem: 1K bytes max.
 Statement statistics:
   Memory used: 128000K bytes
 Total runtime: 84.302 ms
(23 rows)


gtlions=#  explain analyze select * from gtt1 t1,gtt2 t2 where t1.name=t2.name;
                                                                            QUERY PLAN                                                                       
      
-------------------------------------------------------------------------------------------------------------------------------------------------------------
------
 Gather Motion 4:1  (slice2; segments: 4)  (cost=1.11..4.20 rows=1 width=12)
   Rows out:  4 rows at destination with 24 ms to first row, 32 ms to end, start offset by 50 ms.
   ->  Hash Join  (cost=1.11..4.20 rows=1 width=12)
         Hash Cond: t2.name::text = t1.name::text
         Rows out:  Avg 1.3 rows x 3 workers.  Max 2 rows (seg3) with 10 ms to first row, 13 ms to end, start offset by -253 ms.
         Executor memory:  1K bytes avg, 1K bytes max (seg0).
         Work_mem used:  1K bytes avg, 1K bytes max (seg0). Workfile: (0 spilling, 0 reused)
         (seg3)   Hash chain length 1.0 avg, 1 max, using 4 of 524341 buckets.
         ->  Seq Scan on gtt2 t2  (cost=0.00..3.04 rows=1 width=6)
               Rows out:  Avg 1.3 rows x 3 workers.  Max 2 rows (seg3) with 0.187 ms to first row, 0.189 ms to end, start offset by -243 ms.
         ->  Hash  (cost=1.06..1.06 rows=1 width=6)
               Rows in:  Avg 4.0 rows x 4 workers.  Max 4 rows (seg0) with 3.477 ms to end, start offset by -1556 ms.
               ->  Broadcast Motion 4:4  (slice1; segments: 4)  (cost=0.00..1.06 rows=1 width=6)
                     Rows out:  Avg 4.0 rows x 4 workers at destination.  Max 4 rows (seg0) with 0.161 ms to first row, 0.177 ms to end, start offset by -155
6 ms.
                     ->  Seq Scan on gtt1 t1  (cost=0.00..1.01 rows=1 width=6)
                           Rows out:  Avg 1.3 rows x 3 workers.  Max 2 rows (seg3) with 0.143 ms to first row, 0.146 ms to end, start offset by -260 ms.
 Slice statistics:
   (slice0)    Executor memory: 184K bytes.
   (slice1)    Executor memory: 212K bytes avg x 4 workers, 216K bytes max (seg0).
   (slice2)    Executor memory: 8401K bytes avg x 4 workers, 8401K bytes max (seg0).  Work_mem: 1K bytes max.
 Statement statistics:
   Memory used: 128000K bytes
 Total runtime: 82.710 ms
(23 rows)


gtlions=# explain analyze select * from gtt1 t1,gtt2 t2 where t1.id=t2.id and t1.name=t2.name;
                                                                    QUERY PLAN                                                                    
--------------------------------------------------------------------------------------------------------------------------------------------------
 Gather Motion 4:1  (slice1; segments: 4)  (cost=1.02..4.12 rows=1 width=12)
   Rows out:  4 rows at destination with 15 ms to first row, 27 ms to end, start offset by 29 ms.
   ->  Hash Join  (cost=1.02..4.12 rows=1 width=12)
         Hash Cond: t2.id = t1.id AND t2.name::text = t1.name::text
         Rows out:  Avg 1.3 rows x 3 workers.  Max 2 rows (seg3) with 16 ms to first row, 23 ms to end, start offset by 344 ms.
         Executor memory:  1K bytes avg, 1K bytes max (seg3).
         Work_mem used:  1K bytes avg, 1K bytes max (seg3). Workfile: (0 spilling, 0 reused)
         (seg0)   Hash chain length 1.0 avg, 1 max, using 1 of 524341 buckets.
         (seg3)   Hash chain length 1.0 avg, 1 max, using 2 of 524341 buckets.
         ->  Seq Scan on gtt2 t2  (cost=0.00..3.04 rows=1 width=6)
               Rows out:  Avg 1.3 rows x 3 workers.  Max 2 rows (seg3) with 0.186 ms to first row, 0.188 ms to end, start offset by 344 ms.
         ->  Hash  (cost=1.01..1.01 rows=1 width=6)
               Rows in:  Avg 1.3 rows x 3 workers.  Max 2 rows (seg3) with 0.706 ms to end, start offset by 359 ms.
               ->  Seq Scan on gtt1 t1  (cost=0.00..1.01 rows=1 width=6)
                     Rows out:  Avg 1.3 rows x 3 workers.  Max 2 rows (seg3) with 0.493 ms to first row, 0.499 ms to end, start offset by 359 ms.
 Slice statistics:
   (slice0)    Executor memory: 184K bytes.
   (slice1)    Executor memory: 6333K bytes avg x 4 workers, 8385K bytes max (seg0).  Work_mem: 1K bytes max.
 Statement statistics:
   Memory used: 128000K bytes
 Total runtime: 56.840 ms
(21 rows)
gtlions=# explain analyze select * from gtt1 t1,gtt2 t2 where t1.id=t2.id and t1.name=t2.name and t1.id=11 and t1.name='b';
                                              QUERY PLAN                                               
-------------------------------------------------------------------------------------------------------
 Gather Motion 1:1  (slice1; segments: 1)  (cost=0.00..4.11 rows=4 width=12)
   Rows out:  1 rows at destination with 1.874 ms to end, start offset by 29 ms.
   ->  Nested Loop  (cost=0.00..4.11 rows=1 width=12)
         Rows out:  1 rows with 0.298 ms to first row, 0.305 ms to end, start offset by -313 ms.
         ->  Seq Scan on gtt1 t1  (cost=0.00..1.02 rows=1 width=6)
               Filter: id = 11 AND name::text = 'b'::text AND 'b'::text = name::text AND 11 = id
               Rows out:  1 rows with 0.187 ms to first row, 0.189 ms to end, start offset by -313 ms.
         ->  Seq Scan on gtt2 t2  (cost=0.00..3.08 rows=1 width=6)
               Filter: 'b'::text = name::text AND 11 = id AND name::text = 'b'::text AND id = 11
               Rows out:  1 rows with 0.037 ms to first row, 0.039 ms to end, start offset by -312 ms.
 Slice statistics:
   (slice0)    Executor memory: 184K bytes.
   (slice1)    Executor memory: 202K bytes (seg2).
 Statement statistics:
   Memory used: 128000K bytes
 Total runtime: 31.186 ms
(16 rows)


gtlions=# explain analyze select * from gtt1 t1,gtt2 t2 where t1.id=t2.id and t1.name=t2.name and t1.id=11 and t1.name='b' and t2.id=11 and t2.name='b';
                                               QUERY PLAN                                               
--------------------------------------------------------------------------------------------------------
 Gather Motion 1:1  (slice1; segments: 1)  (cost=0.00..4.08 rows=4 width=12)
   Rows out:  1 rows at destination with 2.433 ms to first row, 2.434 ms to end, start offset by 32 ms.
   ->  Nested Loop  (cost=0.00..4.08 rows=1 width=12)
         Rows out:  1 rows with 0.270 ms to first row, 0.277 ms to end, start offset by -315 ms.
         ->  Seq Scan on gtt1 t1  (cost=0.00..1.01 rows=1 width=6)
               Filter: id = 11 AND name::text = 'b'::text
               Rows out:  1 rows with 0.183 ms to first row, 0.184 ms to end, start offset by -315 ms.
         ->  Seq Scan on gtt2 t2  (cost=0.00..3.06 rows=1 width=6)
               Filter: id = 11 AND name::text = 'b'::text
               Rows out:  1 rows with 0.017 ms to first row, 0.019 ms to end, start offset by -315 ms.
 Slice statistics:
   (slice0)    Executor memory: 184K bytes.
   (slice1)    Executor memory: 176K bytes (seg2).
 Statement statistics:
   Memory used: 128000K bytes
 Total runtime: 35.278 ms
(16 rows)
从测试结果来看:
0. 单表查询,使用其中一个dk列作为查询条件,Gather Motion步骤是没办法过滤segment的;
1. 单表查询,使用dk的所有列,Gather Motion步骤可以选择只有数据存在的segment的;
2. 两表关联查询,使用其中一个dk列作为查询条件,需要执行Broadcast Motion动作进行数据重新分布;
3. 两表关联查询,使用dk的所有列,可以避免Broadcast Motion动作;
-EOF-
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值