PostgreSQL分区表实现
PG内置分区参考链接: http://blog.163.com/digoal@126/blog/static/163877040201651410300333/
PG分区表讨论和Patch链接:
https://www.postgresql.org/message-id/flat/55D3093C.5010800@lab.ntt.co.jp#55D3093C.5010800@lab.ntt.co.jp
https://wiki.postgresql.org/wiki/Table_partitioning
PostgreSQL在10的版本正式发布了分区, 在10之前的版本需要通过其他手段来实现,其中包括触发器(非常损耗性能的一种),我们现在简单的看一下PG是如何实现分区的.
方式1: Table Inherits + Triggers
方式2: Table Inherits + Rules
方式1的具体实现:
1) 创建父表和子表, 事实上所有数据各个子表(分区)继承到父表上的, 通过触发器, 将符合要求的数据插入到合适的分区中.
2) 创建相应的触发器:
2. 方式2具体实现参考PG官方手册. 5.数据结构定义章节-Partition Table
实验结果:
分区性能:
参数更改后:
PG内置分区参考链接: http://blog.163.com/digoal@126/blog/static/163877040201651410300333/
PG分区表讨论和Patch链接:
https://www.postgresql.org/message-id/flat/55D3093C.5010800@lab.ntt.co.jp#55D3093C.5010800@lab.ntt.co.jp
https://wiki.postgresql.org/wiki/Table_partitioning
PostgreSQL在10的版本正式发布了分区, 在10之前的版本需要通过其他手段来实现,其中包括触发器(非常损耗性能的一种),我们现在简单的看一下PG是如何实现分区的.
方式1: Table Inherits + Triggers
方式2: Table Inherits + Rules
方式1的具体实现:
1) 创建父表和子表, 事实上所有数据各个子表(分区)继承到父表上的, 通过触发器, 将符合要求的数据插入到合适的分区中.
mydb=# \i partitions.sql
CREATE TABLE
CREATE TABLE
CREATE TABLE
CREATE TABLE
CREATE TABLE
CREATE TABLE
CREATE TABLE
CREATE TABLE
CREATE TABLE
CREATE TABLE
CREATE TABLE
CREATE TABLE
CREATE TABLE
mydb=# \! cat partitions.sql
create table t (
id int not null ,
name varchar(10) not null ,
emp_date date not null
);
create table t_y2018m01
(check (emp_date >= date '2018-01-01'
and emp_date < date '2018-02-01'))
inherits (t);
create table t_y2018m02
(check (emp_date >= date '2018-02-01'
and emp_date < date '2018-03-01'))
inherits (t);
create table t_y2018m03
(check (emp_date >= date '2018-03-01'
and emp_date < date '2018-04-01'))
inherits (t);
create table t_y2018m04
(check (emp_date >= date '2018-04-01'
and emp_date < date '2018-05-01'))
inherits (t);
create table t_y2018m05
(check (emp_date >= date '2018-05-01'
and emp_date < date '2018-06-01'))
inherits (t);
create table t_y2018m06
(check (emp_date >= date '2018-06-01'
and emp_date < date '2018-07-01'))
inherits (t);
create table t_y2018m07
(check (emp_date >= date '2018-07-01'
and emp_date < date '2018-08-01'))
inherits (t);
create table t_y2018m08
(check (emp_date >= date '2018-08-01'
and emp_date < date '2018-09-01'))
inherits (t);
create table t_y2018m09
(check (emp_date >= date '2018-09-01'
and emp_date < date '2018-10-01'))
inherits (t);
create table t_y2018m10
(check (emp_date >= date '2018-10-01'
and emp_date < date '2018-11-01'))
inherits (t);
create table t_y2018m11
(check (emp_date >= date '2018-11-01'
and emp_date < date '2018-12-01'))
inherits (t);
create table t_y2018m12
(check (emp_date >= date '2018-12-01'
and emp_date < date '2019-01-01'))
inherits (t);
2) 创建相应的触发器:
create or replace function t_insert_trigger()
returns trigger as $$
begin
if ( new.emp_date >= date '2018-01-01' and
new.emp_date < date '2018-02-01' ) then
insert into t_y2018m01 values (new.*);
elsif ( new.emp_date >= date '2018-02-01' and
new.emp_date < date '2018-03-01' ) then
insert into t_y2018m02 values (new.*);
elsif ( new.emp_date >= date '2018-03-01' and
new.emp_date < date '2018-04-01' ) then
insert into t_y2018m03 values (new.*);
elsif ( new.emp_date >= date '2018-04-01' and
new.emp_date < date '2018-05-01' ) then
insert into t_y2018m04 values (new.*);
elsif ( new.emp_date >= date '2018-05-01' and
new.emp_date < date '2018-06-01' ) then
insert into t_y2018m05 values (new.*);
elsif ( new.emp_date >= date '2018-06-01' and
new.emp_date < date '2018-07-01' ) then
insert into t_y2018m06 values (new.*);
elsif ( new.emp_date >= date '2018-07-01' and
new.emp_date < date '2018-08-01' ) then
insert into t_y2018m07 values (new.*);
elsif ( new.emp_date >= date '2018-08-01' and
new.emp_date < date '2018-09-01' ) then
insert into t_y2018m08 values (new.*);
elsif ( new.emp_date >= date '2018-09-01' and
new.emp_date < date '2018-10-01' ) then
insert into t_y2018m09 values (new.*);
elsif ( new.emp_date >= date '2018-10-01' and
new.emp_date < date '2018-11-01' ) then
insert into t_y2018m10 values (new.*);
elsif ( new.emp_date >= date '2018-11-01' and
new.emp_date < date '2018-12-01' ) then
insert into t_y2018m11 values (new.*);
elsif ( new.emp_date >= date '2018-12-01' and
new.emp_date < date '2019-01-01' ) then
insert into t_y2018m12 values (new.*);
else
raise exception 'Date out of Range. Fix the t_insert_trigger() function!';
end if;
return null;
end;
$$
language plpgsql;
create trigger trigger_insert_t
before insert on t
for each row execute procedure t_insert_trigger();
2. 方式2具体实现参考PG官方手册. 5.数据结构定义章节-Partition Table
实验结果:
mydb=# insert into t values (1,'NiuJinlin','2018-01-02' :: DATE);
INSERT 0 0
mydb=# select * from t_y2018m01;
id | name | emp_date
----+-----------+------------
1 | NiuJinlin | 2018-01-02
(1 row)
mydb=# select * from t;
id | name | emp_date
----+-----------+------------
1 | NiuJinlin | 2018-01-02
(1 row)
mydb=# insert into t values (2,'Jiexy','2018-04-02' :: DATE);
INSERT 0 0
mydb=# select * from t;
id | name | emp_date
----+-----------+------------
1 | NiuJinlin | 2018-01-02
2 | Jiexy | 2018-04-02
(2 rows)
mydb=# select * from t_y2018m04;
id | name | emp_date
----+-------+------------
2 | Jiexy | 2018-04-02
(1 row)
mydb=# drop table t_y2018m01;
DROP TABLE
mydb=#
mydb=# select * from t;
id | name | emp_date
----+-------+------------
2 | Jiexy | 2018-04-02
(1 row)
mydb=# insert into t values(1,'NiuJinlin','2018-01-02' :: DATE);
ERROR: relation "t_y2018m01" does not exist
LINE 1: insert into t_y2018m01 values (new.*)
^
QUERY: insert into t_y2018m01 values (new.*)
CONTEXT: PL/pgSQL function t_insert_trigger() line 5 at SQL statement
mydb=# select * from t;
id | name | emp_date
----+-------+------------
2 | Jiexy | 2018-04-02
(1 row)
删除分区后的影响, 但是不影响其他分区.分区性能:
Constraint_Exclusion参数设置为"partition"后(PG9.2.4以后是默认的), 在查询表T的时候where部分的Check Constraint去对比,主动查找分区内容:
mydb=# explain select count(1) from t where emp_date < DATE '2018-05-01';
QUERY PLAN
-------------------------------------------------------------------------
Aggregate (cost=98.67..98.68 rows=1 width=8)
-> Append (cost=0.00..95.00 rows=1469 width=0)
-> Seq Scan on t (cost=0.00..0.00 rows=1 width=0)
Filter: (emp_date < '2018-05-01'::date)
-> Seq Scan on t_y2018m01 (cost=0.00..23.75 rows=367 width=0)
Filter: (emp_date < '2018-05-01'::date)
-> Seq Scan on t_y2018m02 (cost=0.00..23.75 rows=367 width=0)
Filter: (emp_date < '2018-05-01'::date)
-> Seq Scan on t_y2018m03 (cost=0.00..23.75 rows=367 width=0)
Filter: (emp_date < '2018-05-01'::date)
-> Seq Scan on t_y2018m04 (cost=0.00..23.75 rows=367 width=0)
Filter: (emp_date < '2018-05-01'::date)
(12 rows)
参数更改后:
mydb=# alter database mydb set constraint_exclusion = 'off';
mydb=# show constraint_exclusion;
constraint_exclusion
----------------------
off
(1 row)
mydb=# explain select count(1) from t where emp_date < DATE '2018-05-01';
QUERY PLAN
-------------------------------------------------------------------------
Aggregate (cost=296.01..296.02 rows=1 width=8)
-> Append (cost=0.00..285.00 rows=4405 width=0)
-> Seq Scan on t (cost=0.00..0.00 rows=1 width=0)
Filter: (emp_date < '2018-05-01'::date)
-> Seq Scan on t_y2018m01 (cost=0.00..23.75 rows=367 width=0)
Filter: (emp_date < '2018-05-01'::date)
-> Seq Scan on t_y2018m02 (cost=0.00..23.75 rows=367 width=0)
Filter: (emp_date < '2018-05-01'::date)
-> Seq Scan on t_y2018m03 (cost=0.00..23.75 rows=367 width=0)
Filter: (emp_date < '2018-05-01'::date)
-> Seq Scan on t_y2018m04 (cost=0.00..23.75 rows=367 width=0)
Filter: (emp_date < '2018-05-01'::date)
-> Seq Scan on t_y2018m05 (cost=0.00..23.75 rows=367 width=0)
Filter: (emp_date < '2018-05-01'::date)
-> Seq Scan on t_y2018m06 (cost=0.00..23.75 rows=367 width=0)
Filter: (emp_date < '2018-05-01'::date)
-> Seq Scan on t_y2018m07 (cost=0.00..23.75 rows=367 width=0)
Filter: (emp_date < '2018-05-01'::date)
-> Seq Scan on t_y2018m08 (cost=0.00..23.75 rows=367 width=0)
Filter: (emp_date < '2018-05-01'::date)
-> Seq Scan on t_y2018m09 (cost=0.00..23.75 rows=367 width=0)
....
我们发现PG会扫描所有的分区去查看数据.