PostgreSQL分区表案例实战
1、什么是分区表
PostgreSQL10版本一个重量级的新特性是支持内置分区表,在分区表方面前进了一大步,目前支持范围分区和列表分区。继承加触发器方式实现的分区表称为传统分区表,10版本提供的分区表称为内置分区表。
分区表主要有以下优势:
-
当查询或更新一个分区上的大部分数据时,对分区进行索引扫描代价很大,然而,在分区上使用顺序扫描能提升性能。
-
当需要删除一个分区数据时,通过 DROP TABLE 删除一个分区,远比 DELETE 删除数据高效,特别适用于日志数据场景。
-
由于一个表只能存储在一个表空间上,使用分区表后,可以将分区放到不同的表空间上,例如可以将系统很少访问的分区放到廉价的存储设备上,也可以将系统常访问的分区存储在高速存储上。
分区表的优势主要体现在降低大表管理成本和某些场景的性能提升,相比普通表性能有何差异?本章将对传统分区表、内置分区表做性能测试。
- 减小单表大小,提高查询效率。
- 方便对表的维护,如创建索引耗时更短,通过清理历史分区(truncate 历史分区)释放磁盘空间(PostgreSQL delete不释放空间)。
- 分区子表更小,索引层级更低,减小扫描时间。
- 内部维护任务,autovacuum/analyze最近分区子表而不是整个大表,可以降低IO消耗,从而减少业务库的IO压力,避免表膨胀。
2、传统分区表
传统分区表是通过继承和触发器方式实现的,其实现过程步骤多,非常复杂,需要定义父表、定义子表、定义子表约束、创建子表索引、创建分区插入、删除、修改函数和触发器等,可以说是在普通表基础上手工实现的分区表。在介绍传统分区表之前先介绍继承,继承是传统分区表的重要组成部分。
2.1.1 创建分区表
# 创建父表
mydb=# create table log_ins(
mydb(# id serial,
mydb(# user_id int4,
mydb(# create_time timestamp(0) without time zone);
CREATE TABLE
# 创建子表
mydb=# create table log_ins_history(check (create_time < '2017-01-01')) inherits(log_ins);
CREATE TABLE
mydb=# create table log_ins201701(check (create_time >= '2017-01-01' and create_time < '2017-2-1')) inherits(log_ins);
CREATE TABLE
mydb=# create table log_ins201701(check (create_time >= '2017-01-01' and create_time < '2017-3-1')) inheritsmydb=# create table log_ins201702(check (create_time >= '2017-02-01' and create_time < '2017-3-1')) inherits(log_ins);
CREATE TABLE
mydb=# create table log_ins201703(check (create_time >= '2017-03-01' and create_time < '2017-4-1')) inherits(log_ins);
CREATE TABLE
mydb=# create table log_ins201704(check (create_time >= '2017-04-01' and create_time < '2017-5-1')) inherits(log_ins);
CREATE TABLE
mydb=# create table log_ins201705(check (create_time >= '2017-05-01' and create_time < '2017-6-1')) inherits(log_ins);
CREATE TABLE
mydb=# create table log_ins201706(check (create_time >= '2017-06-01' and create_time < '2017-7-1')) inherits(log_ins);
CREATE TABLE
mydb=# create table log_ins201707(check (create_time >= '2017-07-01' and create_time < '2017-8-1')) inherits(log_ins);
CREATE TABLE
mydb=# create table log_ins201708(check (create_time >= '2017-08-01' and create_time < '2017-9-1')) inherits(log_ins);
CREATE TABLE
mydb=# create table log_ins201709(check (create_time >= '2017-09-01' and create_time < '2017-10-1')) inherits(log_ins);
CREATE TABLE
mydb=# create table log_ins201710(check (create_time >= '2017-10-01' and create_time < '2017-11-1')) inherits(log_ins);
CREATE TABLE
mydb=# create table log_ins201711(check (create_time >= '2017-11-01' and create_time < '2017-12-1')) inherits(log_ins);
CREATE TABLE
mydb=# create table log_ins201712(check (create_time >= '2017-12-01' and create_time < '2018-01-1')) inherits(log_ins);
CREATE TABLE
# 子表创建索引
mydb=# create index inx_his_ctime on log_ins_history using btree(create_time);
CREATE INDEX
mydb=# create index inx_log_ins_201701_ctime on log_ins201701 using btree (create_time);
CREATE INDEX
mydb=# create index inx_log_ins_201702_ctime on log_ins201702 using btree (create_time);
CREATE INDEX
mydb=# create index inx_log_ins_201703_ctime on log_ins201703 using btree (create_time);
CREATE INDEX
mydb=# create index inx_log_ins_201704_ctime on log_ins201704 using btree (create_time);
CREATE INDEX
mydb=# create index inx_log_ins_201705_ctime on log_ins201705 using btree (create_time);
CREATE INDEX
mydb=# create index inx_log_ins_201706_ctime on log_ins201706 using btree (create_time);
CREATE INDEX
mydb=# create index inx_log_ins_201707_ctime on log_ins201707 using btree (create_time);
CREATE INDEX
mydb=# create index inx_log_ins_201708_ctime on log_ins201708 using btree (create_time);
CREATE INDEX
mydb=# create index inx_log_ins_201709_ctime on log_ins201709 using btree (create_time);
CREATE INDEX
mydb=# create index inx_log_ins_201710_ctime on log_ins201710 using btree (create_time);
CREATE INDEX
mydb=# create index inx_log_ins_201711_ctime on log_ins201711 using btree (create_time);
CREATE INDEX
mydb=# create index inx_log_ins_201712_ctime on log_ins201712 using btree (create_time);
CREATE INDEX
# 创建触发函数
create or replace function log_ins_insert_trigger()
returns trigger
language plpgsql
as $function$
begin
if (new.create_time < '2017-01-01' ) then
insert into log_ins_history values (new.*);
elsif (new.create_time >= '2017-01-01' and new.create_time < '2017-02-01') then
insert into log_ins201701 values (new.*);
elsif (new.create_time >= '2017-02-01' and new.create_time < '2017-03-01') then
insert into log_ins201702 values (new.*);
elsif (new.create_time >= '2017-03-01' and new.create_time < '2017-04-01') then
insert into log_ins201703 values (new.*);
elsif (new.create_time >= '2017-04-01' and new.create_time < '2017-05-01') then
insert into log_ins201704 values (new.*);
elsif (new.create_time >= '2017-05-01' and new.create_time < '2017-06-01') then
insert into log_ins201705 values (new.*);
elsif (new.create_time >= '2017-06-01' and new.create_time < '2017-07-01') then
insert into log_ins201706 values (new.*);
elsif (new.create_time >= '2017-07-01' and new.create_time < '2017-08-01') then
insert into log_ins201707 values (new.*);
elsif (new.create_time >= '2017-08-01' and new.create_time < '2017-09-01') then
insert into log_ins201708 values (new.*);
elsif (new.create_time >= '2017-09-01' and new.create_time < '2017-10-01') then
insert into log_ins201709 values (new.*);
elsif (new.create_time >= '2017-10-01' and new.create_time < '2017-11-01') then
insert into log_ins201710 values (new.*);
elsif (new.create_time >= '2017-11-01' and new.create_time < '2017-12-01') then
insert into log_ins201711 values (new.*);
elsif (new.create_time >= '2017-12-01' and new.create_time < '2018-01-01') then
insert into log_ins201712 values (new.*);
else
raise exception 'create_time out of range. fix then log_ins_insert_trigger() function!';
end if;
return null;
end;
$function$;
# 创建触发器
mydb=# create trigger insert_log_ins_trigger before insert on log_ins for each row
mydb-# execute procedure log_ins_insert_trigger();
CREATE TRIGGER
触发器创建完成后,往父表log_ins插入数据时,会执行触发器并触发函数log_ins_insert_trigger()将表数据插人到相应分区中。DELETE、UPDATE触发器和函数创建过程和INSERT方式类似,这里不再列出,这步完成之后,传统分区表的创建步骤已全部完成。
注意:父表和子表都可以定义主键约束,但会带来一个问题,由于父表和子表的主键约束是分别创建的,那么可能在父表和子表中存在重复的主键数据,这对整个分区表说来做不到主键唯一,举个简单的例子,假如在父表和所有子表的user_id字段上创建主键,父表与子表及子表与子表之间可能存在相同的user_id,这点需要注意。
2.1.2 使用分区表
i 父表插入数据
mydb=# insert into log_ins(user_id, create_time)
select round(100000000 * random()), generate_series('2016-12-01'::date, '2017-12-01'::date, '1 minute');
INSERT 0 0
ii 查看数据
# 查看插入的数据
mydb=# select * from log_ins limit 3;
id | user_id | create_time
----+----------+---------------------
1 | 44271258 | 2016-12-01 00:00:00
2 | 47537901 | 2016-12-01 00:01:00
3 | 37397474 | 2016-12-01 00:02:00
(3 rows)
# 查看父表的数据
mydb=# select count(*) from only log_ins;
count
-------
0
(1 row)
mydb=# select count(*) from log_ins;
count
---------
1051202
(1 row)
mydb=# select min(create_time), max(create_time) from log_ins201701;
min | max
---------------------+---------------------
2017-01-01 00:00:00 | 2017-01-31 23:59:00
(1 row)
# 查看表大小
mydb=# \dt+ log_ins*;
List of relations
Schema | Name | Type | Owner | Size | Description
--------+-----------------+-------+----------+------------+-------------
public | log_ins | table | postgres | 0 bytes |
public | log_ins201701 | table | postgres | 3888 kB |
public | log_ins201702 | table | postgres | 3512 kB |
public | log_ins201703 | table | postgres | 3888 kB |
public | log_ins201704 | table | postgres | 3768 kB |
public | log_ins201705 | table | postgres | 3888 kB |
public | log_ins201706 | table | postgres | 3768 kB |
public | log_ins201707 | table | postgres | 3888 kB |
public | log_ins201708 | table | postgres | 3888 kB |
public | log_ins201709 | table | postgres | 3768 kB |
public | log_ins201710 | table | postgres | 3888 kB |
public | log_ins201711 | table | postgres | 3768 kB |
public | log_ins201712 | table | postgres | 8192 bytes |
public | log_ins_history | table | postgres | 3888 kB |
(14 rows)
由此可见,数据不在父表中,不在子表中。用户插入的数据都被插入到了子表中。
iii 分区表的查询执行计划
# 查询父表执行计划
mydb=# explain analyze select * from log_ins where create_time > '2017-01-01' and create_time > '2017-01-02';
QUERY PLAN
------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------
Append (cost=0.00..24462.87 rows=959668 width=16) (actual time=0.088..116.770 rows=959040 loops=1)
-> Seq Scan on log_ins (cost=0.00..0.00 rows=1 width=16) (actual time=0.002..0.002 rows=0 loops=1)
Filter: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time > '201
7-01-02 00:00:00'::timestamp without time zone))
-> Seq Scan on log_ins201701 (cost=0.00..1822.20 rows=86410 width=16) (actual time=0.085..7.569 rows=86
398 loops=1)
Filter: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time > '201
7-01-02 00:00:00'::timestamp without time zone))
Rows Removed by Filter: 2882
-> Seq Scan on log_ins201702 (cost=0.00..1645.60 rows=80640 width=16) (actual time=0.011..6.852 rows=80
640 loops=1)
Filter: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time > '201
7-01-02 00:00:00'::timestamp without time zone))
-> Seq Scan on log_ins201703 (cost=0.00..1822.20 rows=89280 width=16) (actual time=0.010..7.327 rows=89
280 loops=1)
Filter: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time > '201
7-01-02 00:00:00'::timestamp without time zone))
-> Seq Scan on log_ins201704 (cost=0.00..1764.00 rows=86400 width=16) (actual time=0.012..7.111 rows=86
400 loops=1)
Filter: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time > '201
7-01-02 00:00:00'::timestamp without time zone))
-> Seq Scan on log_ins201705 (cost=0.00..1822.20 rows=89280 width=16) (actual time=0.009..7.070 rows=89
280 loops=1)
Filter: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time > '201
7-01-02 00:00:00'::timestamp without time zone))
-> Seq Scan on log_ins201706 (cost=0.00..1764.00 rows=86400 width=16) (actual time=0.008..7.378 rows=86
400 loops=1)
Filter: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time > '201
7-01-02 00:00:00'::timestamp without time zone))
-> Seq Scan on log_ins201707 (cost=0.00..1822.20 rows=89280 width=16) (actual time=0.009..7.508 rows=89
280 loops=1)
Filter: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time > '201
7-01-02 00:00:00'::timestamp without time zone))
-> Seq Scan on log_ins201708 (cost=0.00..1822.20 rows=89280 width=16) (actual time=0.012..7.116 rows=89
280 loops=1)
Filter: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time > '201
7-01-02 00:00:00'::timestamp without time zone))
-> Seq Scan on log_ins201709 (cost=0.00..1764.00 rows=86400 width=16) (actual time=0.010..6.935 rows=86
400 loops=1)
Filter: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time > '201
7-01-02 00:00:00'::timestamp without time zone))
-> Seq Scan on log_ins201710 (cost=0.00..1822.20 rows=89280 width=16) (actual time=0.009..7.027 rows=89
280 loops=1)
Filter: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time > '201
7-01-02 00:00:00'::timestamp without time zone))
-> Seq Scan on log_ins201711 (cost=0.00..1764.00 rows=86400 width=16) (actual time=0.008..6.746 rows=86
400 loops=1)
Filter: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time > '201
7-01-02 00:00:00'::timestamp without time zone))
-> Bitmap Heap Scan on log_ins201712 (cost=10.48..29.73 rows=617 width=16) (actual time=0.009..0.009 ro
ws=2 loops=1)
Recheck Cond: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time
> '2017-01-02 00:00:00'::timestamp without time zone))
Heap Blocks: exact=1
-> Bitmap Index Scan on inx_log_ins_201712_ctime (cost=0.00..10.32 rows=617 width=0) (actual time
=0.006..0.006 rows=2 loops=1)
Index Cond: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_t
ime > '2017-01-02 00:00:00'::timestamp without time zone))
Planning Time: 0.578 ms
Execution Time: 138.230 ms
(33 rows)
# 查询子表执行计划
mydb=# explain analyze select * from log_ins201701 where create_time > '2017-01-01' and create_time > '2017-01-02';
QUERY PLAN
------------------------------------------------------------------------------------------------------------
-------------------------------------------
Seq Scan on log_ins201701 (cost=0.00..1822.20 rows=86410 width=16) (actual time=0.087..7.220 rows=86398 lo
ops=1)
Filter: ((create_time > '2017-01-01 00:00:00'::timestamp without time zone) AND (create_time > '2017-01-0
2 00:00:00'::timestamp without time zone))
Rows Removed by Filter: 2882
Planning Time: 0.064 ms
Execution Time: 9.110 ms
(5 rows)
从以上执行计划看出在分区 log_ins_201701上进行了索引扫描,执行时间为138.230毫秒,接着查看直接查询子表log_ins_201701的执行计划,只需要9.110毫秒,性能上有一定提升如果并发量上去的话,这个差异将更明显,因此在实际生产过程中,对于传统分区表分区方式,不建议应用访问父表,而是直接访问子表。
那么,应用如何定位到访问哪张子表呢?可以根据预先的分区约束定义,本节这个例子log_ins是根据时间范围分区,那么应用可以根据时间来判断查询哪张子表,当然,以上是根据分区表分区键查询的场景,如果根据非分区键查询则会扫描分区表的所有分区。
iiii 查看分区表信息
mydb=# \d log_ins
Table "public.log_ins"
Column | Type | Collation | Nullable | Default
-------------+--------------------------------+-----------+----------+----------------------------
---------
id | integer | | not null | nextval('log_ins_id_seq'::r
egclass)
user_id | integer | | |
create_time | timestamp(0) without time zone | | |
Triggers:
insert_log_ins_trigger BEFORE INSERT ON log_ins FOR EACH ROW EXECUTE FUNCTION log_ins_insert_trigger()
Number of child tables: 13 (Use \d+ to list them.)
以上信息显示了表log_ins有14个分区,并且创建了触发器,触发器函数为logins_insert_trigger(),如果想列出分区名称可通过\d+ log_ins元命令列出。
mydb=# \d+ log_ins
Table "public.log_ins"
Column | Type | Collation | Nullable | Default |
Storage | Stats target | Description
-------------+--------------------------------+-----------+----------+-------------------------------------+
---------+--------------+-------------
id | integer | | not null | nextval('log_ins_id_seq'::regclass) |
plain | |
user_id | integer | | | |
plain | |
create_time | timestamp(0) without time zone | | | |
plain | |
Triggers:
insert_log_ins_trigger BEFORE INSERT ON log_ins FOR EACH ROW EXECUTE FUNCTION log_ins_insert_trigger()
Child tables: log_ins201701,
log_ins201702,
log_ins201703,
log_ins201704,
log_ins201705,
log_ins201706,
log_ins201707,
log_ins201708,
log_ins201709,
log_ins201710,
log_ins201711,
log_ins201712,
log_ins_history
Access method: heap
3、内置分区表
PostgreSQL10一个重量级新特性是支持内置分区表,用户不需要预先在父表上定义INSERT、DELETE、UPDATE触发器,对父表的DML 操作会自动路由到相应分区,相比传统分区表大幅度降低了维护成本。
目前仅支持范围分区和列表分区。
3.1 创建分区表
mydb=# create table log_par(
mydb(# id serial,
mydb(# user_id int4,
mydb(# create_time timestamp(0) without time zone
mydb(# )partition by range (create_time);
CREATE TABLE
# 表log_par指定了分区策略为范围分区,分区键为create_time字段。
# 创建分区,并设置分区的分区键取值范围,如下所示:
mydb=# create table log_par_hist partition of log_par for values from (unbounded) to ('2017-01-01');
mydb=# create table log_par_201701 partition of log_par for values from ('2017-01-01') to ('2017-02-01');
CREATE TABLE
mydb=# create table log_par_201702 partition of log_par for values from ('2017-02-01') to ('2017-03-01');
CREATE TABLE
mydb=# create table log_par_201703 partition of log_par for values from ('2017-03-01') to ('2017-04-01');
CREATE TABLE
mydb=# create table log_par_201704 partition of log_par for values from ('2017-04-01') to ('2017-05-01');
CREATE TABLE
mydb=# create table log_par_201705 partition of log_par for values from ('2017-05-01') to ('2017-06-01');
CREATE TABLE
mydb=# create table log_par_201706 partition of log_par for values from ('2017-06-01') to ('2017-07-01');
CREATE TABLE
mydb=# create table log_par_201707 partition of log_par for values from ('2017-07-01') to ('2017-08-01');
CREATE TABLE
mydb=# create table log_par_201708 partition of log_par for values from ('2017-08-01') to ('2017-09-01');
CREATE TABLE
mydb=# create table log_par_201709 partition of log_par for values from ('2017-09-01') to ('2017-10-01');
CREATE TABLE
mydb=# create table log_par_201710 partition of log_par for values from ('2017-10-01') to ('2017-11-01');
CREATE TABLE
mydb=# create table log_par_201711 partition of log_par for values from ('2017-11-01') to ('2017-12-01');
CREATE TABLE
mydb=# create table log_par_201712 partition of log_par for values from ('2017-12-01') to ('2018-01-01');
CREATE TABLE
# 注意分区的分区键范围不要有重叠,定义分区键范围实质上给分区创建了约束。
# 给所有分区的分区键创建索引,如下所示:
mydb=# create index idx_log_par_his_ctime on log_par_his USING btree(create_time);
mydb=# create index idx_log_par_201701_ctime on log_par_201701 USING btree(create_time);
CREATE INDEX
mydb=# create index idx_log_par_201702_ctime on log_par_201702 USING btree(create_time);
CREATE INDEX
mydb=# create index idx_log_par_201703_ctime on log_par_201703 USING btree(create_time);
CREATE INDEX
mydb=# create index idx_log_par_201704_ctime on log_par_201704 USING btree(create_time);
CREATE INDEX
mydb=# create index idx_log_par_201705_ctime on log_par_201705 USING btree(create_time);
CREATE INDEX
mydb=# create index idx_log_par_201706_ctime on log_par_201706 USING btree(create_time);
CREATE INDEX
mydb=# create index idx_log_par_201707_ctime on log_par_201707 USING btree(create_time);
CREATE INDEX
mydb=# create index idx_log_par_201708_ctime on log_par_201708 USING btree(create_time);
CREATE INDEX
mydb=# create index idx_log_par_201709_ctime on log_par_201709 USING btree(create_time);
CREATE INDEX
mydb=# create index idx_log_par_201710_ctime on log_par_201710 USING btree(create_time);
CREATE INDEX
mydb=# create index idx_log_par_201711_ctime on log_par_201711 USING btree(create_time);
CREATE INDEX
mydb=# create index idx_log_par_201712_ctime on log_par_201712 USING btree(create_time);
CREATE INDEX
3.2 使用分区表
# 分区表插入数据
mydb=# insert into log_par(user_id, create_time)
mydb-# select round(10000*random()),generate_series('2016-12-01'::date,'2017-12-01'::date,'1 minute')
后面内容同传统分区一致,此处不在编写
内置分区表原理实际上和传统分区表一样,也是使用继承方式,分区可称为子表
总结:
本章介绍了传统分区表和内置分区表的部署、分区维护和性能测试,传统分区表通过继承和触发器实现,创建过程非常复杂,维护成本很高,内置分区表是PostgreSQL10新特性,用户不再需要创建触发器和函数,省去了大量维护成本,性能方面两者几乎无差异。分区表和普通表间的性能差异本章通过两个查询场景进行了性能对比,一个是基于非分区键查询的场景,另一个是基于分区键查询的场景,从测试结果来看,基于非分区键的查询场景分区表性能比普通表低很多,基于分区键查询分区表父表比普通表性能略低,基于分区键查询分区表子表比普通表性能略有提升,读者在生产系统中使用分区表时需考虑分区后的SQL性能变化。