pg表分区
官网链接:http://www.postgres.cn/docs/11/ddl-partitioning.html
一、声明式划分
PostgreSQL提供了表分区特性。通常支持范围RANGE划分,比如按时间。列表划分、哈希分区。
特点:
a.无法把一个常规表转换成分区表,反之亦然。不过,可以把一个包含数据的常规表或者分区表作为分区加入到另一个分区表,或者从分区表中移走一个分区并且把它变成一个独立的表
b.分区本身也可能被定义为分区表,这种用法被称为子分区。分区可以有自己的与其他分区不同的索引、约束以及默认值
c.在分区表的键列上创建一个索引,还有其他需要的索引(键索引并不是必需的,但是大部分场景中它都能很有帮助)。这会自动在每个分区上创建一个索引,并且后来创建或者附着的任何分区也将会包含索引
d.由于分区表并不直接拥有任何数据,尝试在分区表上使用TRUNCATE
ONLY
将总是返回错误
e .如果NOT NULL
约束在父表中存在,那么就不能删除分区的列上的对应的NOT NULL
约束。分区表的CHECK约束和NOT NULL约束总是会被其所有的分区所继承。不允许在分区表上创建标记为NO INHERIT的CHECK约束。只要分区表中不存在分区,则支持使用ONLY仅在分区表上增加或者删除约束。一旦分区存在,那样做就会导致错误,因为当分区存在时是不支持仅在分区表上增加或删除约束的。不过,分区表本身上的约束可以被增加(如果它们不出现在父表中)和删除。
示例:
1.通过指定PARTITION BY
子句把measurement
表创建为分区表
CREATE TABLE public.measurement (
city_id int4 NOT NULL,
logdate date NOT NULL,
peaktemp int4 NULL,
unitsales int4 NULL
)
PARTITION BY RANGE (logdate);
2.创建分区子表(约束在子表创建,无法在主表创建跨越所有分区的约束)
CREATE TABLE public.measurement_y2006m03 PARTITION OF public.measurement (
CONSTRAINT measurement_y2006m03_pk PRIMARY KEY (city_id)
)FOR VALUES FROM ('2006-03-01') TO ('2006-04-01');
3.索引创建(主表创建,分区也生效)
CREATE INDEX measurement_logdate_idx ON ONLY public.measurement USING btree (logdate);
4.分区维护
--删除掉不再需要的分区
DROP TABLE measurement_y2006m03;
--分区从分区表中移除,但是保留它作为一个独立的表
ALTER TABLE measurement DETACH PARTITION measurement_y2006m02;
--创建默认的表分区(版本11以及以上支持)
create table measurement_default partition of measurement default;
5.存储函数(实战)
创建分区任务
/*
-- function: Create partitions for table measurement.
-- parameter:
-- p_partition_unit: Partition unit. month, day;
-- p_partiton_cnt: Count of partitions to be created when this function is called. The default value is 7.
-- p_start_utc_time: One UTC timestamp value that is in the range of the first partition's partition-key, in milliseconds. The default value is now.
-- return:
-- success: 0
-- 模板复用方法:
-- 1. measurement 替换为新的表名
-- 2. (city_id) 替换为新的表的 (主键字段)
*/
create or replace function func_create_partition_for_measurement(p_partition_unit text default 'month', p_partiton_cnt int default 6, p_start_time timestamp default now()) returns int as
$body$
declare
v_child_tbname text;
v_start_time timestamp with time zone;
v_end_time timestamp with time zone;
v_has_default int default 0;
v_parent_tbname text;
v_partition_defaultname text;
v_partition_defaultname_tmp text;
partition_interval interval;
time_format text;
begin
v_parent_tbname := 'measurement';
v_partition_defaultname := v_parent_tbname || '_default';
v_partition_defaultname_tmp := v_partition_defaultname || '_tmp';
--pg_inherits为系统表用来记录表间的继承关系 inhparent为主表oid inhrelid为分区表oid regclass是oid的别名 oid是一个库中对象的唯一标识
perform * from pg_inherits where cast(cast(inhparent as regclass) as text) = v_parent_tbname and cast(cast(inhrelid as regclass) as text) = v_partition_defaultname;
if found then
v_has_default := 1;
--如果默认分区*_default在系统表中已经存在,解除该表的分区(但是数据未删除)
execute 'alter table ' || v_parent_tbname || ' detach partition ' || v_partition_defaultname ;
end if;
partition_interval := (case p_partition_unit
when 'month' then interval '1 month'
when 'day' then interval '1 day'
end);
time_format := (case p_partition_unit
when 'month' then 'YYYYMM'
when 'day' then 'YYYYMMDD'
end);
if p_partition_unit in ('month', 'day') then
--按照月分割开始时间的时间戳
v_start_time := date_trunc(p_partition_unit, p_start_time);
for v_i in 0..(p_partiton_cnt - 1) loop
v_child_tbname := v_parent_tbname || '_' || to_char(v_start_time, time_format);
perform * from pg_inherits where cast(cast(inhparent as regclass) as text) = v_parent_tbname and cast(cast(inhrelid as regclass) as text) = v_child_tbname;
if not found then
v_end_time := v_start_time + partition_interval;
execute 'create table ' || v_child_tbname || ' partition of ' || v_parent_tbname || ' for values from (''' || v_start_time || ''') to (''' || v_end_time || ''')';
execute 'alter table ' || v_child_tbname || ' add constraint pk_' || v_child_tbname || ' primary key (city_id)';
end if;
v_start_time := v_start_time + partition_interval;
end loop;
end if;
if v_has_default = 1 then
--如果默认分区表存在,把default分区表的数据重新分区
execute 'drop table if exists ' || v_partition_defaultname_tmp;
execute 'alter table ' || v_partition_defaultname || ' drop constraint if exists pk_' || v_partition_defaultname;
execute 'alter table ' || v_partition_defaultname || ' rename to ' || v_partition_defaultname_tmp ;
execute 'create table ' || v_partition_defaultname || ' partition of ' || v_parent_tbname || ' default';
execute 'alter table ' || v_partition_defaultname || ' add constraint pk_' || v_partition_defaultname || ' primary key (city_id)';
execute 'insert into ' || v_parent_tbname || ' select * from ' || v_partition_defaultname_tmp;
execute 'drop table if exists ' || v_partition_defaultname_tmp;
else
execute 'create table ' || v_partition_defaultname || ' partition of ' || v_parent_tbname || ' default';
execute 'alter table ' || v_partition_defaultname || ' add constraint pk_' || v_partition_defaultname || ' primary key (city_id)';
end if;
return 0;
end;
$body$ language plpgsql;
select func_create_partition_for_measurement('month',2,'2023-02-23')
保存最近?天的数据
/*
-- function: Clean old data for measurement
-- parameter:
-- p_partition_unit: Partition unit. month, day;
-- p_keep_days: Duration of data retention, in days. The default value is 90.
-- return:
-- success: 0
-- 模板复用方法:
-- 1. measurement 替换为新的表名
-- 2. logdate 替换为 分区字段。
*/
create or replace
function func_clean_partition_for_measurement(p_partition_unit varchar default 'month',
p_keep_days int default 90) returns int as $body$
declare v_parent_tbname text;
v_partition_defaultname text;
v_sql text;
v_clean_time timestamp with time zone ;
v_recd record;
v_partiton_colname text;
time_format text;
begin
v_sql = e'select now() - interval \'' || p_keep_days || e' day\'';
execute v_sql
into
v_clean_time;
v_parent_tbname := 'measurement';
v_partiton_colname := 'logdate';
v_partition_defaultname := v_parent_tbname || '_default';
time_format :=
(case
p_partition_unit
when 'month' then 'YYYYMM'
when 'day' then 'YYYYMMDD'
end);
perform *
from
pg_inherits
where
cast(cast(inhparent as regclass) as text) = v_parent_tbname
and cast(cast(inhrelid as regclass) as text) = v_partition_defaultname;
if found then execute 'delete from ' || v_partition_defaultname || ' where ' || v_partiton_colname || ' < ' || 'cast(''' || v_clean_time
|| ''' as timestamp with time zone)';
end if;
if p_partition_unit in ('month',
'day') then for v_recd in
select
cast(cast(inhparent as regclass) as text) as parent_tbname,
cast(cast(inhrelid as regclass) as text) as child_tbname
from
pg_inherits
where
cast(cast(inhparent as regclass) as text) = v_parent_tbname
and cast(cast(inhrelid as regclass) as text) < v_parent_tbname || '_' || to_char(v_clean_time, time_format)
order by
child_tbname asc loop execute 'drop table if exists ' || v_recd.child_tbname;
end loop;
end if;
return 0;
end;
$body$ language plpgsql;
select func_clean_partition_for_measurement('month',90);
模拟数据
--模拟数据
--创建自增序列
create sequence public.id_seq increment by 1 minvalue 1 maxvalue 9223372036854775807 cache 1 no cycle;
select
nextval('public.id_seq');
with dedate as (
select
(
select
array_agg(i::date)
from
generate_series('2008-01-01'::date, '2008-03-01'::date, '+1 day'::interval) as t(i) ) )
insert
into
product
select
nextval('public.id_seq') as id,
coalesce(( select * from dedate )[(random()* 59)] , '2008-01-01')as logdate,
floor(random()* 20)+ 1 as peaktemp,
floor(random()* 10000)+ 1 as unitsales
from
generate_series(1, 1000000, 1);
二、使用继承实现
虽然内建的声明式分区适合于大部分常见的用例,但还是有一些场景需要更加灵活的方法。分区可以使用表继承来实现,这能够带来一些声明式分区不支持的特性,例如:
- 对声明式分区来说,分区必须具有和分区表正好相同的列集合,而在表继承中,子表可以有父表中没有出现过的额外列。
- 表继承允许多继承。
- 声明式分区仅支持范围、列表以及哈希分区,而表继承允许数据按照用户的选择来划分(不过注意,如果约束排除不能有效地剪枝子表,查询性能可能会很差)。
- 在使用声明式分区时,一些操作比使用表继承时要求更长的持锁时间。例如,向分区表中增加分区或者从分区表移除分区要求在父表上取得一个
ACCESS EXCLUSIVE
锁,而在常规继承的情况下一个SHARE UPDATE EXCLUSIVE
锁就足够了。
示例:
- 创建“主”表,所有的“子”表都将从它继承。这个表将不包含数据。不要在这个表上定义任何检查约束,除非想让它们应用到所有的子表上。同样,在这个表上定义索引或者唯一约束也没有意义。
- 创建数个“子”表,每一个都从主表继承。通常,这些表将不会在从主表继承的列集合之外增加任何列
- 为子表增加不重叠的表约束来定义每个分区允许的键值
- 对于每个子表,在键列上创建一个索引,以及任何想要的其他索引
- 创建触发器/代码逻辑实现
--创建主表
create table product ( id int not null,
logdate date not null,
peaktemp int,
unitsales int );
--创建子表1
create table public.product_y2008m01 ( constraint product_y2008m01_logdate_check check (((logdate >= '2008-01-01'::date)
and (logdate < '2008-02-01'::date))) ) inherits (public.product);
create index product_y2008m01_logdate_idx on
public.product_y2008m01
using btree (logdate);
--创建子表2
create table public.product_y2008m02 ( constraint product_y2008m02_logdate_check check (((logdate >= '2008-02-01'::date)
and (logdate < '2008-03-01'::date))) ) inherits (public.product);
create index product_y2008m02_logdate_idx on
public.product_y2008m02
using btree (logdate);
--创建触发器函数
create or replace
function product_insert_trigger() returns trigger as $$
begin
if ( NEW.logdate >= date '2008-01-01'
and NEW.logdate < date '2008-02-01' ) then
insert
into
product_y2008m01
values (NEW.*);
elsif ( NEW.logdate >= date '2008-02-01'
and NEW.logdate < date '2008-03-01' ) then
insert
into
product_y2008m02
values (NEW.*);
else raise exception 'Date out of range. Fix the product_insert_trigger() function!';
end if;
return null;
end;
$$ language plpgsql;
--创建触发器
create trigger insert_product_trigger before
insert
on
public.product for each row execute procedure product_insert_trigger();
分区维护
--要快速移除旧数据,只需要简单地去掉不再需要的子表:
DROP TABLE measurement_y2006m02;
--要从继承层次表中去掉子表,但还是把它当做一个表保留:
ALTER TABLE measurement_y2006m02 NO INHERIT measurement;
实战
创建存储函数每月执行建表语句,更新触发器函数
create or replace function func_create_partition_for_product(p_partition_method int default 1, p_partiton_cnt int default 6, p_start_time timestamp default now()) returns int as
$body$
declare
v_child_tbname text;
v_start_time timestamp with time zone;
v_end_time timestamp with time zone;
v_has_default int default 0;
v_parent_tbname text;
v_partition_defaultname text;
v_partition_defaultname_tmp text;
v_partition_unit text;
v_partition_interval interval;
v_time_format text;
begin
v_parent_tbname := 'product';
v_partition_defaultname := v_parent_tbname || '_default';
v_partition_defaultname_tmp := v_partition_defaultname || '_tmp';
perform * from pg_inherits where cast(cast(inhparent as regclass) as text) = v_parent_tbname and cast(cast(inhrelid as regclass) as text) = v_partition_defaultname;
if found then
v_has_default := 1;
execute 'alter table ' || v_partition_defaultname || ' NO INHERIT ' || v_parent_tbname;
end if;
v_partition_unit := (case p_partition_method
when 1 then 'month'
when 2 then 'day'
end);
v_partition_interval := (case p_partition_method
when 1 then interval '1 month'
when 2 then interval '1 day'
end);
v_time_format := (case p_partition_method
when 1 then 'YYYYMM'
when 2 then 'YYYYMMDD'
end);
if p_partition_method in (1,2) then
v_start_time := date_trunc(v_partition_unit, p_start_time);
for v_i in 0..(p_partiton_cnt - 1) loop
v_child_tbname := v_parent_tbname || '_' || to_char(v_start_time, v_time_format);
perform * from pg_inherits where cast(cast(inhparent as regclass) as text) = v_parent_tbname and cast(cast(inhrelid as regclass) as text) = v_child_tbname;
if not found then
v_end_time := v_start_time + v_partition_interval;
execute 'create table '||v_child_tbname||'() inherits ('||v_parent_tbname||')';
execute 'ALTER TABLE '||v_child_tbname||' ADD CONSTRAINT '||v_child_tbname||'_check CHECK ((logdate >= ''' || v_start_time || '''::date) and (logdate < ''' || v_end_time || '''::date))';
execute 'ALTER TABLE '||v_child_tbname||' ADD CONSTRAINT '||v_child_tbname||'_pk PRIMARY KEY (id)';
execute 'CREATE UNIQUE INDEX '||v_child_tbname||'_logdate_idx ON '||v_child_tbname||' USING btree (id)';
end if;
v_start_time := v_start_time + v_partition_interval;
end loop;
end if;
if v_has_default = 1 then
execute 'drop table if exists ' || v_partition_defaultname_tmp;
execute 'alter table ' || v_partition_defaultname || ' drop constraint if exists pk_' || v_partition_defaultname;
execute 'alter table ' || v_partition_defaultname || ' rename to ' || v_partition_defaultname_tmp ;
execute 'create table '||v_partition_defaultname||'() inherits ('||v_parent_tbname||')' ;
execute 'alter table ' || v_partition_defaultname || ' add constraint pk_' || v_partition_defaultname || ' primary key (id)';
execute 'insert into ' || v_parent_tbname || ' select * from ' || v_partition_defaultname_tmp;
execute 'drop table if exists ' || v_partition_defaultname_tmp;
else
execute 'create table '||v_partition_defaultname||'() inherits ('||v_parent_tbname||')' ;
execute 'alter table ' || v_partition_defaultname || ' add constraint pk_' || v_partition_defaultname || ' primary key (id)';
end if;
return 0;
end;
$body$ language plpgsql;
--创建触发器函数
create or replace
function product_insert_trigger() returns trigger as $$
declare v_curtime timestamp with time zone;
begin
v_curtime := date_trunc('month', now());
if ( NEW.logdate >= v_curtime
and NEW.logdate < v_curtime + '1 month' ) then execute 'insert into product_' || to_char(v_curtime,'YYYYMM') ||' values ( $1.*)' using NEW;
elseif ( NEW.logdate >= v_curtime + '1 month'
and NEW.logdate < v_curtime + '2 month' ) then execute 'insert into product_' || to_char(v_curtime + '1 month', 'YYYYMM')|| ' values ($1.*)' using NEW;
elseif ( NEW.logdate >= v_curtime + '2 month'
and NEW.logdate < v_curtime + '3 month' ) then execute 'insert into product_' || to_char(v_curtime + '2 month', 'YYYYMM')|| ' values ($1.*)' using NEW;
else
--raise exception 'Date out of range. Fix the product_insert_trigger() function!';
insert
into
product_default
values (NEW.*);
end if;
return null;
end;
$$ language plpgsql;