随机分布
testdb=# create table test01 (id int, col1 varchar(100)) distributed RANDOMLY;
CREATE TABLE
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# select gp_segment_id,count(*) from test01 group by gp_segment_id;
gp_segment_id | count
---------------+-------
1 | 1
6 | 4
3 | 1
8 | 1
0 | 2
5 | 1
(6 rows)
根据键值分布
testdb=# create table test02 (id int, col1 varchar(100)) distributed by (id);
CREATE TABLE
testdb=# insert into test02 values (1,'a');
INSERT 0 1
testdb=# insert into test02 values (1,'a');
INSERT 0 1
testdb=# insert into test02 values (1,'a');
INSERT 0 1
testdb=# insert into test02 values (1,'a');
INSERT 0 1
testdb=# insert into test02 values (1,'a');
INSERT 0 1
testdb=# insert into test02 values (1,'a');
INSERT 0 1
testdb=# insert into test02 values (1,'a');
INSERT 0 1
testdb=# insert into test02 values (1,'a');
INSERT 0 1
testdb=# insert into test02 values (1,'a');
INSERT 0 1
testdb=# insert into test02 values (1,'a');
INSERT 0 1
testdb=# insert into test02 values (1,'a');
INSERT 0 1
testdb=# select gp_segment_id,count(*) from test02 group by gp_segment_id;
gp_segment_id | count
---------------+-------
4 | 11
(1 row)
如果有主键,就会根据主键分布
testdb=# create table test03(id int primary key, col1 varchar(100));
CREATE TABLE
testdb=# \d test03
Table "public.test03"
Column | Type | Modifiers
--------+------------------------+-----------
id | integer | not null
col1 | character varying(100) |
Indexes:
"test03_pkey" PRIMARY KEY, btree (id)
Distributed by: (id)
没有主键,没有分布建,第一列作为分布键
testdb=# create table test04(id int, col1 varchar(100));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE
testdb=# \d test04
Table "public.test04"
Column | Type | Modifiers
--------+------------------------+-----------
id | integer |
col1 | character varying(100) |
Distributed by: (id)
testdb=# create table test05 ( col1 varchar(100), id int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'col1' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE
testdb=# \d test05
Table "public.test05"
Column | Type | Modifiers
--------+------------------------+-----------
col1 | character varying(100) |
id | integer |
Distributed by: (col1)
有主键,不能按照随机分配
testdb=# create table test06 ( id int primary key, col1 varchar(100)) distributed randomly;
ERROR: PRIMARY KEY and DISTRIBUTED RANDOMLY are incompatible
testdb=# create table test06 ( col1 varchar(100), id int primary key) distributed randomly;
ERROR: PRIMARY KEY and DISTRIBUTED RANDOMLY are incompatible
有主键,则分布键只能为主键,若不同则会报错。
testdb=# create table test06 ( id int primary key, col1 varchar(100)) distributed by (col1);
ERROR: PRIMARY KEY and DISTRIBUTED BY definitions are incompatible
HINT: When there is both a PRIMARY KEY and a DISTRIBUTED BY clause, the DISTRIBUTED BY clause must be a subset of the PRIMARY KEY.
testdb=# create table test06 ( id int primary key, col1 varchar(100)) distributed by (id);
CREATE TABLE
testdb=# \d test06
Table "public.test06"
Column | Type | Modifiers
--------+------------------------+-----------
id | integer | not null
col1 | character varying(100) |
Indexes:
"test06_pkey" PRIMARY KEY, btree (id)
Distributed by: (id)
查看表的分布键方式
-- 如果为 RANDOMLY分布,则 distkey为空
testdb=# select c.relname, policytype, numsegments, distkey
from pg_class c, pg_catalog.gp_distribution_policy gpp
where c.oid = gpp.localoid;
relname | policytype | numsegments | distkey
---------+------------+-------------+---------
test01 | p | 12 |
test03 | p | 12 | 1
test04 | p | 12 | 1
test05 | p | 12 | 1
test06 | p | 12 | 1
test02 | p | 12 | 1
(6 rows)
查看表数据分布
select gp_segment_id,count(*) from table_name group by gp_segment_id;