平台检测:
./gpcheckos -f ../conf/hostlist -m cloudwave0 -s cloudwave3
create database cloudwave template template1;
创建用户
create role cloud superuser login unencrypted password 'cloud';
//copy命令插入数据
copy test from '/usr/local/cloudwave_sample/output/bigdata.tbl' with delimiter '|' null as '';
//创建appendonly表
create table test_app10 with(appendonly=true,compresslevel=5) as select * from bigdata_ext10;
create table public.bigdata_coldis with(appendonly=true,orientation=column,compresslevel=5) as select * from bigdata_ext10 distributed by(rksj);
create table public.bigdata_colnocompress with(appendonly=true,orientation=column) as select * from bigdata_ext10 distributed by(id);
//注意with语句与distributed语句的放置位置。
//列存储加压缩 压缩率挺高 157GB数据 行存压缩为57GB 列存压缩后为37GB
//创建索引
create index idx_t on bigdata(id);
create index btree_name_idx on bigdata_coldis(name);
创建Bmap索引
create index bmap_age_idx on bigdata_coldis using bitmap (age);
启动gpfdist服务
nohup ./gpfdist -d /usr/local/cloudwave_sample/output -p 8888 > /tmp/gpfdist.log 2>&1 &
//创建外部表
create external table public.bigdata_ext10
(
ID bigint ,
NAME VARCHAR(10) ,
ZJHM CHAR(18) ,
AGE INTEGER ,
PHONE VARCHAR(24) ,
SCORE NUMERIC(12,3) ,
BIRTH DATE ,
RKSJ TIMESTAMP ,
JIGUAN VARCHAR(64) ,
ADDRESS VARCHAR(255)
)
Location('gpfdist://192.168.0.13:8888/bigdata.tbl')
Format 'TEXT' (delimiter '|' null '' escape 'OFF')
Encoding 'UNICODE'
Log errors into public.err_log segment reject limit 10 rows;
//注:gpfdist命令的-s参数指定的路径与Location指定的文件路径共同构成文件的绝对路径
修改参数:需要重启使内存参数生效
gpconfig -c shared_buffers -v 1536MB
gpconfig -c gp_segment_connect_timeout -v 1h
greenplum数据库可优化参数:
effective_cache_size 4GB
gp_resqueue_priority_cpucores_per_segment 4
shared_buffers 768MB
temp_buffers 2048
gp_vmem_protect_limit 8192
gp_vmem_protect_segworker_cache_limit 512
work_mem 512MB
maintenance_work_mem 2GB
optimizer off
pljava_statement_cache_size 512
transaction_read_only on
gp_max_local_distributed_cache 1024
vacuum_freeze_min_age 100000000
statement_mem 125MB
max_statement_mem 2000MB
max_work_mem 2000MB
max_fsm_pages 200000
gp_workfile_limit_files_per_query 100000
gp_autostats_on_change_threshold 2147483647
gp_resqueue_memory_policy eager_free, auto, none
表级优化:
数据压缩
分区表
bigdata_coldis 以rksj列为分布列的 列存储 10亿数据表
select gp_segment_id,count(*) from bigdata_coldis group by gp_segment_id order by 1;
gp_segment_id | count
---------------+-----------
0 | 166666524
1 | 166678397
2 | 166643918
3 | 166664841
4 | 166682321
5 | 166663999
gp查询sql的执行时间与所选列多少有关:
select zjhm from bigdata_coldis where age = 15 and score = 6468.499 limit 20;
1列:51.9s,,104s(67s)
2列:70s 103s
3列: 141s
4列: 142s
5列: 190s
6列: 194s
7列: 227s
8列: 282s
9列: 302s
10列: 389s
* 391s
gp测试过程总结:
1、gp sql需要加limit限制结果集,否则会取全部结果集
2、gp select列不能太多 列多与列少性能表现不一样
3、范围过滤性能好于等值过滤性能
4、加索引性能好于不加索引
5、optimizer慎重开启 最好手动优化sql
6、shared_buffer不宜设置过大
./gpcheckos -f ../conf/hostlist -m cloudwave0 -s cloudwave3
create database cloudwave template template1;
创建用户
create role cloud superuser login unencrypted password 'cloud';
//copy命令插入数据
copy test from '/usr/local/cloudwave_sample/output/bigdata.tbl' with delimiter '|' null as '';
//创建appendonly表
create table test_app10 with(appendonly=true,compresslevel=5) as select * from bigdata_ext10;
create table public.bigdata_coldis with(appendonly=true,orientation=column,compresslevel=5) as select * from bigdata_ext10 distributed by(rksj);
create table public.bigdata_colnocompress with(appendonly=true,orientation=column) as select * from bigdata_ext10 distributed by(id);
//注意with语句与distributed语句的放置位置。
//列存储加压缩 压缩率挺高 157GB数据 行存压缩为57GB 列存压缩后为37GB
//创建索引
create index idx_t on bigdata(id);
create index btree_name_idx on bigdata_coldis(name);
创建Bmap索引
create index bmap_age_idx on bigdata_coldis using bitmap (age);
启动gpfdist服务
nohup ./gpfdist -d /usr/local/cloudwave_sample/output -p 8888 > /tmp/gpfdist.log 2>&1 &
//创建外部表
create external table public.bigdata_ext10
(
ID bigint ,
NAME VARCHAR(10) ,
ZJHM CHAR(18) ,
AGE INTEGER ,
PHONE VARCHAR(24) ,
SCORE NUMERIC(12,3) ,
BIRTH DATE ,
RKSJ TIMESTAMP ,
JIGUAN VARCHAR(64) ,
ADDRESS VARCHAR(255)
)
Location('gpfdist://192.168.0.13:8888/bigdata.tbl')
Format 'TEXT' (delimiter '|' null '' escape 'OFF')
Encoding 'UNICODE'
Log errors into public.err_log segment reject limit 10 rows;
//注:gpfdist命令的-s参数指定的路径与Location指定的文件路径共同构成文件的绝对路径
修改参数:需要重启使内存参数生效
gpconfig -c shared_buffers -v 1536MB
gpconfig -c gp_segment_connect_timeout -v 1h
greenplum数据库可优化参数:
effective_cache_size 4GB
gp_resqueue_priority_cpucores_per_segment 4
shared_buffers 768MB
temp_buffers 2048
gp_vmem_protect_limit 8192
gp_vmem_protect_segworker_cache_limit 512
work_mem 512MB
maintenance_work_mem 2GB
optimizer off
pljava_statement_cache_size 512
transaction_read_only on
gp_max_local_distributed_cache 1024
vacuum_freeze_min_age 100000000
statement_mem 125MB
max_statement_mem 2000MB
max_work_mem 2000MB
max_fsm_pages 200000
gp_workfile_limit_files_per_query 100000
gp_autostats_on_change_threshold 2147483647
gp_resqueue_memory_policy eager_free, auto, none
表级优化:
数据压缩
分区表
bigdata_coldis 以rksj列为分布列的 列存储 10亿数据表
select gp_segment_id,count(*) from bigdata_coldis group by gp_segment_id order by 1;
gp_segment_id | count
---------------+-----------
0 | 166666524
1 | 166678397
2 | 166643918
3 | 166664841
4 | 166682321
5 | 166663999
gp查询sql的执行时间与所选列多少有关:
select zjhm from bigdata_coldis where age = 15 and score = 6468.499 limit 20;
1列:51.9s,,104s(67s)
2列:70s 103s
3列: 141s
4列: 142s
5列: 190s
6列: 194s
7列: 227s
8列: 282s
9列: 302s
10列: 389s
* 391s
gp测试过程总结:
1、gp sql需要加limit限制结果集,否则会取全部结果集
2、gp select列不能太多 列多与列少性能表现不一样
3、范围过滤性能好于等值过滤性能
4、加索引性能好于不加索引
5、optimizer慎重开启 最好手动优化sql
6、shared_buffer不宜设置过大