GreenPlum中的分区表在数据量较大的情况下对于提升查询性能的帮助非常的,但是GreenPlum本身并没有提供分区表自动维护的工具,这里我们利用GreenPlum的PL/SQL自定义两个分区表自动维护的存储过程(也可以成为函数)。
创建存储过程之前首先要创建一个记录分区表详细信息的视图,这里可以参见上篇博文。由于业务中有多张表需要做分区,而且分区字段的类型并不一样,因此我们首先创建一张字典表,记录每张表的分区类型,如下:
CREATE TABLE op_tb_partition (
tb_name varchar(100) DEFAULT NULL,
timetype varchar(8) DEFAULT NULL
)DISTRIBUTED BY (tb_name);
select * from op_tb_partition;
tb_name | timetype
-----------------------------+----------
nl_app_action_error_trace | unixtime
nl_mob_app_anr_data | usertime
nl_mob_app_error_trace_test | datetime
(3 rows)
三张表的表结构以及分区分析如下:
create table nl_app_action_error_trace (
id bigserial not null,
timestamp int not null,
application_id int not null,
application_instance_id int not null,
action_id int not null,
action_type int not null,
key_action_id int not null,
error_type int not null,
error_message varchar(1025) default null,
error_count int not null,
request_uri varchar(1024) default null,
trace_data varchar(512) default null
)WITH (appendonly=true, orientation=column,compresstype=zlib, compresslevel=5) DISTRIBUTED BY (id,timestamp,application_id)
PARTITION BY RANGE (timestamp)
(
partition p20160701 start (1467302400::int) end (1467388800::int),
partition p20160702 start (1467388800::int) end (1467475200::int)
);
create table nl_mob_app_error_trace_test (
id bigserial not null,
timestamp timestamp not null,
mobile_app_id int not null,
mobile_app_version_id int not null,
manufacturer_id int not null,
manufacturer_model_id int not null,
os_id int not null,
os_version_id int not null,
agent_version_id int not null default 0,
country_id int not null,
region_id int not null,
carrier_id int not null,
connect_type_id int not null,
key_url_id int not null,
ip bigint not null default 0,
host_id int not null,
host_ip bigint not null default 0,
error_type int not null,
error_code int not null,
error_count int not null,
request_url_id int not null,
request_url varchar(1024) DEFAULT NULL,
trace_data varchar(512) DEFAULT NULL
)WITH (appendonly=true, orientation=column,compresstype=zlib, compresslevel=5) DISTRIBUTED BY (id,timestamp,mobile_app_id)
PARTITION BY RANGE (timestamp)
(
partition p20160701 start ('2016-07-01'::date) end ('2016-07-02'::date),
partition p20160702 start ('2016-07-02'::date) end ('2016-07-03'::date)
);
CREATE TABLE NL_MOB_APP_ANR_DATA (
id bigserial NOT NULL ,
timestamp int NOT NULL,
timestamp_anr int NOT NULL,
mobile_app_id int NOT NULL,
mobile_app_version_id int NOT NULL,
manufacturer_id int NOT NULL,
manufacturer_model_id int NOT NULL,
os_id int NOT NULL,
os_version_id int NOT NULL,
carrier_id int NOT NULL,
connect_type_id int NOT NULL,
agent_version_id int NOT NULL,
device_id bigint NOT NULL,
mobile_anr_id bigint NOT NULL DEFAULT 0,
anr_message varchar(1024) DEFAULT NULL,
stacktrace varchar(512) DEFAULT NULL ,
stacktrace_deobfuscated varchar(512) DEFAULT NULL,
anr_trace_file varchar(512) DEFAULT NULL,
anr_system_log varchar(512) DEFAULT NULL,
additional_info varchar(512) DEFAULT NULL
)WITH (appendonly=true, orientation=column,compresstype=zlib, compresslevel=5) DISTRIBUTED BY (id,timestamp,mobile_app_id)
PARTITION BY RANGE (timestamp)
(
partition p20160701 start (1838400::int) end (1839840::int),
partition p20160702 start (1839840::int) end (1841280::int)
);
创建添加分区的存储过程,GreenPlum中单引号的转义符为两个单引号,详细代码如下:
create or replace function add_partition_day()
returns text as
$$
declare tb_options record;
declare curr_part varchar(8);
declare max_part varchar(20);
declare part_name varchar(9);
declare end_part varchar(8);
declare start_range int;
declare end_range int;
declare x int;
begin
for tb_options in select * from op_tb_partition group by tb_name,timetype loop
if tb_options.timetype = 'datetime' then
select replace(substring(current_date + interval '7 day' from 1 for 10),'-','') as date into max_part;
select substring(max(partition_name) from 2 for 8) into curr_part from v_gp_range_partition_meta a join pg_class b on a.table_name=b.oid where b.relname =tb_options.tb_name;
select date(max_part) - date(curr_part) into x;
while x > 0 loop
select replace(substring(date(curr_part) + interval '1 day' from 1 for 10),'-','') as date into curr_part;
select replace(substring(date(curr_part) + interval '1 day' from 1 for 10),'-','') as date into end_part;
select 'p' || curr_part into part_name;
execute 'alter table ' || tb_options.tb_name || ' add partition ' || part_name || ' start (''' || curr_part || '''::date) end (''' || end_part || '''::date) with (appendonly=true, orientation=column,compresstype=zlib, compresslevel=5)';
x = x-1;
end loop;
end_part = '';
end_range = 0;
elsif tb_options.timetype = 'usertime' then
select replace(substring(current_date + interval '7 day' from 1 for 10),'-','') as date into max_part;
select substring(max(partition_name) from 2 for 8) into curr_part from v_gp_range_partition_meta a join pg_class b on a.table_name=b.oid where b.relname =tb_options.tb_name;
select date(max_part) - date(curr_part) into x;
while x > 0 loop
select replace(substring(date(curr_part) + interval '1 day' from 1 for 10),'-','') as date into curr_part;
select replace(substring(date(curr_part) + interval '1 day' from 1 for 10),'-','') as date into end_part;
select nl_to_timestamp(date(curr_part)) into start_range;
select nl_to_timestamp(date(end_part)) into end_range;
select 'p' || curr_part into part_name;
execute 'alter table ' || tb_options.tb_name || ' add partition ' || part_name || ' start (' || start_range || '::int) end (' || end_range || '::int) with (appendonly=true, orientation=column,compresstype=zlib, compresslevel=5)';
-- alter table tb_options.tb_name add partition part_name start (start_range::int) end (end_range::int);
x = x-1;
end loop;
end_part = '';
end_range = 0;
elsif tb_options.timetype = 'unixtime' then
select replace(substring(current_date + interval '7 day' from 1 for 10),'-','') as date into max_part;
select substring(max(partition_name) from 2 for 8) into curr_part from v_gp_range_partition_meta a join pg_class b on a.table_name=b.oid where b.relname =tb_options.tb_name;
select date(max_part) - date(curr_part) into x;
while x > 0 loop
select replace(substring(date(curr_part) + interval '1 day' from 1 for 10),'-','') as date into curr_part;
select replace(substring(date(curr_part) + interval '1 day' from 1 for 10),'-','') as date into end_part;
select unix_timestamp(date(curr_part)) into start_range;
select unix_timestamp(date(end_part)) into end_range;
select 'p' || curr_part into part_name;
execute 'alter table ' || tb_options.tb_name || ' add partition ' || part_name || ' start (' || start_range || '::int) end (' || end_range || '::int) with (appendonly=true, orientation=column,compresstype=zlib, compresslevel=5)';
-- alter table tb_options.tb_name add partition part_name start (start_range::int) end (end_range::int);
x = x-1;
end loop;
end_part = '';
end_range = 0;
end if;
end loop;
return 'ok';
end;
$$
LANGUAGE plpgsql;
创建删除分区的存储过程,这里的数据保存3个月,详细代码如下:
create or replace function drop_partition_day()
returns text as
$$
declare tb_options record;
declare const_part int default 90+7;
declare part_count int;
declare min_part_name varchar(255);
declare x int;
begin
for tb_options in select * from op_tb_partition group by tb_name,timetype loop
select count(*) into part_count from v_gp_range_partition_meta a join pg_class b on a.table_name=b.oid where b.relname = tb_options.tb_name;
select part_count - const_part into x;
while x > 0 loop
select min(a.partition_name) into min_part_name from v_gp_range_partition_meta a join pg_class b on a.table_name=b.oid where b.relname = tb_options.tb_name;
execute 'alter table ' || tb_options.tb_name || ' drop partition ' || min_part_name;
x = x -1;
end loop;
part_count = 0;
min_part_name = '';
x = 0;
end loop;
return 'ok';
end;
$$
LANGUAGE plpgsql;
调用声明好的存储过程,如下:
testdb=# select add_partition_day();
add_partition_day
-------------------
ok
(1 row)
testdb=# select drop_partition_day();
drop_partition_day
--------------------
ok
(1 row)
转载于:https://blog.51cto.com/quenlang/1827580