greenplum - 创建表并查看表数据分布情况

随机分布

testdb=# create table test01 (id int, col1 varchar(100)) distributed RANDOMLY;
CREATE TABLE
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# select gp_segment_id,count(*) from test01 group by gp_segment_id;
 gp_segment_id | count
---------------+-------
             1 |     1
             6 |     4
             3 |     1
             8 |     1
             0 |     2
             5 |     1
(6 rows)

根据键值分布

testdb=# create table test02 (id int, col1 varchar(100)) distributed by (id);
CREATE TABLE
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=# select gp_segment_id,count(*) from test02 group by gp_segment_id;
 gp_segment_id | count
---------------+-------
             4 |    11
(1 row)

如果有主键,就会根据主键分布

testdb=# create table test03(id int primary key, col1 varchar(100));
CREATE TABLE
testdb=# \d test03
            Table "public.test03"
 Column |          Type          | Modifiers
--------+------------------------+-----------
 id     | integer                | not null
 col1   | character varying(100) |
Indexes:
    "test03_pkey" PRIMARY KEY, btree (id)
Distributed by: (id)

没有主键,没有分布建,第一列作为分布键

testdb=# create table test04(id int,  col1 varchar(100));
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE
testdb=# \d test04
            Table "public.test04"
 Column |          Type          | Modifiers
--------+------------------------+-----------
 id     | integer                |
 col1   | character varying(100) |
Distributed by: (id)

testdb=# create table test05 ( col1 varchar(100), id int);
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'col1' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE
testdb=# \d test05
            Table "public.test05"
 Column |          Type          | Modifiers
--------+------------------------+-----------
 col1   | character varying(100) |
 id     | integer                |
Distributed by: (col1)

有主键,不能按照随机分配

testdb=# create table test06 ( id int primary key, col1 varchar(100)) distributed randomly;
ERROR:  PRIMARY KEY and DISTRIBUTED RANDOMLY are incompatible
testdb=# create table test06 ( col1 varchar(100), id int primary key) distributed randomly;
ERROR:  PRIMARY KEY and DISTRIBUTED RANDOMLY are incompatible

有主键,则分布键只能为主键,若不同则会报错。

testdb=# create table test06 ( id int primary key, col1 varchar(100)) distributed by (col1);
ERROR:  PRIMARY KEY and DISTRIBUTED BY definitions are incompatible
HINT:  When there is both a PRIMARY KEY and a DISTRIBUTED BY clause, the DISTRIBUTED BY clause must be a subset of the PRIMARY KEY.
testdb=# create table test06 ( id int primary key, col1 varchar(100)) distributed by (id);
CREATE TABLE
testdb=# \d test06
            Table "public.test06"
 Column |          Type          | Modifiers
--------+------------------------+-----------
 id     | integer                | not null
 col1   | character varying(100) |
Indexes:
    "test06_pkey" PRIMARY KEY, btree (id)
Distributed by: (id)

查看表的分布键方式

-- 如果为 RANDOMLY分布,则 distkey为空
testdb=# select c.relname, policytype, numsegments, distkey 
from pg_class c, pg_catalog.gp_distribution_policy gpp 
where c.oid = gpp.localoid;
 relname | policytype | numsegments | distkey
---------+------------+-------------+---------
 test01  | p          |          12 |
 test03  | p          |          12 | 1
 test04  | p          |          12 | 1
 test05  | p          |          12 | 1
 test06  | p          |          12 | 1
 test02  | p          |          12 | 1
(6 rows)

查看表数据分布

select gp_segment_id,count(*) from table_name group by gp_segment_id;
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值