此次实验基于以下网站:
alibabacloud-hologres-connectors/holo-shipper at master · aliyun/alibabacloud-hologres-connectors · GitHubHologres基于TPCH的性能测试介绍_阿里云开发者的博客-程序员ITS201 - 程序员ITS201
实验目的:测试迁移工具holo-shipper
实验结果:将一个Holo实例中的所有表ship到另一个Holo实例中
环境准备
1. 准备数据生成工具
- 安装 git
yum install git
- 安装gcc
yum install gcc
- 下载 TPC-H 数据生成代码
git clone https://github.com/gregrahn/tpch-kit.git
- 进入数据生成工具代码目录
cd tpch-kit/dbgen
- 编译数据生成工具代码
make
2. 生成数据
- 编译成功后,您可以使用如下代码查看代码生成工具的相关参数。
./dbgen --help
- 本次测试仅生成 1 GB 数据,所以运行如下代码生成数据。
./dbgen -vf -s 1
准备数据
1. 创建表
- 由于本文主要使用 psql 进行数据导入操作,需要先在 ECS 中运行如下命令安装 psql
yum install postgresql-server
- 安装 psql 后,您可以使用如下命令登陆 Hologres 实例
PGUSER=<AccessID> PGPASSWORD=<AccessKey> psql -p <Port> -h <Endpoint> -d <Database>
- 使用psql连接Hologres后,您可以使用如下建表语句创建数据库表
DROP TABLE IF EXISTS LINEITEM;
BEGIN;
CREATE TABLE LINEITEM
(
L_ORDERKEY INT NOT NULL,
L_PARTKEY INT NOT NULL,
L_SUPPKEY INT NOT NULL,
L_LINENUMBER INT NOT NULL,
L_QUANTITY DECIMAL(15,2) NOT NULL,
L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL,
L_DISCOUNT DECIMAL(15,2) NOT NULL,
L_TAX DECIMAL(15,2) NOT NULL,
L_RETURNFLAG TEXT NOT NULL,
L_LINESTATUS TEXT NOT NULL,
L_SHIPDATE TIMESTAMPTZ NOT NULL,
L_COMMITDATE TIMESTAMPTZ NOT NULL,
L_RECEIPTDATE TIMESTAMPTZ NOT NULL,
L_SHIPINSTRUCT TEXT NOT NULL,
L_SHIPMODE TEXT NOT NULL,
L_COMMENT TEXT NOT NULL,
PRIMARY KEY (L_ORDERKEY,L_LINENUMBER)
);
CALL set_table_property('LINEITEM', 'clustering_key', 'L_SHIPDATE,L_ORDERKEY');
CALL set_table_property('LINEITEM', 'segment_key', 'L_SHIPDATE');
CALL set_table_property('LINEITEM', 'distribution_key', 'L_ORDERKEY');
CALL set_table_property('LINEITEM', 'bitmap_columns', 'L_ORDERKEY,L_PARTKEY,L_SUPPKEY,L_LINENUMBER,L_RETURNFLAG,L_LINESTATUS,L_SHIPINSTRUCT,L_SHIPMODE,L_COMMENT');
CALL set_table_property('LINEITEM', 'dictionary_encoding_columns', 'L_RETURNFLAG,L_LINESTATUS,L_SHIPINSTRUCT,L_SHIPMODE,L_COMMENT');
CALL set_table_property('LINEITEM', 'time_to_live_in_seconds', '31536000');
COMMIT;
DROP TABLE IF EXISTS ORDERS;
BEGIN;
CREATE TABLE ORDERS
(
O_ORDERKEY INT NOT NULL PRIMARY KEY,
O_CUSTKEY INT NOT NULL,
O_ORDERSTATUS TEXT NOT NULL,
O_TOTALPRICE DECIMAL(15,2) NOT NULL,
O_ORDERDATE timestamptz NOT NULL,
O_ORDERPRIORITY TEXT NOT NULL,
O_CLERK TEXT NOT NULL,
O_SHIPPRIORITY INT NOT NULL,
O_COMMENT TEXT NOT NULL
);
CALL set_table_property('ORDERS', 'segment_key', 'O_ORDERDATE');
CALL set_table_property('ORDERS', 'distribution_key', 'O_ORDERKEY');
CALL set_table_property('ORDERS', 'colocate_with', 'LINEITEM');
CALL set_table_property('ORDERS', 'bitmap_columns', 'O_ORDERKEY,O_CUSTKEY,O_ORDERSTATUS,O_ORDERPRIORITY,O_CLERK,O_SHIPPRIORITY,O_COMMENT');
CALL set_table_property('ORDERS', 'dictionary_encoding_columns', 'O_ORDERSTATUS,O_ORDERPRIORITY,O_CLERK,O_COMMENT');
CALL set_table_property('ORDERS', 'time_to_live_in_seconds', '31536000');
COMMIT;
DROP TABLE IF EXISTS PARTSUPP;
BEGIN;
CREATE TABLE PARTSUPP
(
PS_PARTKEY INT NOT NULL,
PS_SUPPKEY INT NOT NULL,
PS_AVAILQTY INT NOT NULL,
PS_SUPPLYCOST DECIMAL(15,2) NOT NULL,
PS_COMMENT TEXT NOT NULL,
PRIMARY KEY(PS_PARTKEY,PS_SUPPKEY)
);
CALL set_table_property('PARTSUPP', 'distribution_key', 'PS_PARTKEY');
CALL set_table_property('PARTSUPP', 'colocate_with', 'LINEITEM');
CALL set_table_property('PARTSUPP', 'bitmap_columns', 'PS_PARTKEY,PS_SUPPKEY,PS_AVAILQTY,PS_COMMENT');
CALL set_table_property('PARTSUPP', 'dictionary_encoding_columns', 'PS_COMMENT');
CALL set_table_property('PARTSUPP', 'time_to_live_in_seconds', '31536000');
COMMIT;
DROP TABLE IF EXISTS PART;
BEGIN;
CREATE TABLE PART
(
P_PARTKEY INT NOT NULL PRIMARY KEY,
P_NAME TEXT NOT NULL,
P_MFGR TEXT NOT NULL,
P_BRAND TEXT NOT NULL,
P_TYPE TEXT NOT NULL,
P_SIZE INT NOT NULL,
P_CONTAINER TEXT NOT NULL,
P_RETAILPRICE DECIMAL(15,2) NOT NULL,
P_COMMENT TEXT NOT NULL
);
CALL set_table_property('PART', 'distribution_key', 'P_PARTKEY');
CALL set_table_property('PART', 'colocate_with', 'LINEITEM');
CALL set_table_property('PART', 'bitmap_columns', 'P_PARTKEY,P_SIZE,P_NAME,P_MFGR,P_BRAND,P_TYPE,P_CONTAINER,P_COMMENT');
CALL set_table_property('PART', 'dictionary_encoding_columns', 'P_NAME,P_MFGR,P_BRAND,P_TYPE,P_CONTAINER,P_COMMENT');
CALL set_table_property('PART', 'time_to_live_in_seconds', '31536000');
COMMIT;
DROP TABLE IF EXISTS CUSTOMER;
BEGIN;
CREATE TABLE CUSTOMER
(
C_CUSTKEY INT NOT NULL PRIMARY KEY,
C_NAME TEXT NOT NULL,
C_ADDRESS TEXT NOT NULL,
C_NATIONKEY INT NOT NULL,
C_PHONE TEXT NOT NULL,
C_ACCTBAL DECIMAL(15,2) NOT NULL,
C_MKTSEGMENT TEXT NOT NULL,
C_COMMENT TEXT NOT NULL
);
CALL set_table_property('CUSTOMER', 'distribution_key', 'C_CUSTKEY');
CALL set_table_property('CUSTOMER', 'colocate_with', 'LINEITEM');
CALL set_table_property('CUSTOMER', 'bitmap_columns', 'C_CUSTKEY,C_NATIONKEY,C_NAME,C_ADDRESS,C_PHONE,C_MKTSEGMENT,C_COMMENT');
CALL set_table_property('CUSTOMER', 'dictionary_encoding_columns', 'C_NAME,C_ADDRESS,C_PHONE,C_MKTSEGMENT,C_COMMENT');
CALL set_table_property('CUSTOMER', 'time_to_live_in_seconds', '31536000');
COMMIT;
DROP TABLE IF EXISTS SUPPLIER;
BEGIN;
CREATE TABLE SUPPLIER
(
S_SUPPKEY INT NOT NULL PRIMARY KEY,
S_NAME TEXT NOT NULL,
S_ADDRESS TEXT NOT NULL,
S_NATIONKEY INT NOT NULL,
S_PHONE TEXT NOT NULL,
S_ACCTBAL DECIMAL(15,2) NOT NULL,
S_COMMENT TEXT NOT NULL
);
CALL set_table_property('SUPPLIER', 'distribution_key', 'S_SUPPKEY');
CALL set_table_property('SUPPLIER', 'colocate_with', 'LINEITEM');
CALL set_table_property('SUPPLIER', 'bitmap_columns', 'S_SUPPKEY,S_NAME,S_ADDRESS,S_NATIONKEY,S_PHONE,S_COMMENT');
CALL set_table_property('SUPPLIER', 'dictionary_encoding_columns', 'S_NAME,S_ADDRESS,S_PHONE,S_COMMENT');
CALL set_table_property('SUPPLIER', 'time_to_live_in_seconds', '31536000');
COMMIT;
DROP TABLE IF EXISTS NATION;
BEGIN;
CREATE TABLE NATION(
N_NATIONKEY INT NOT NULL PRIMARY KEY,
N_NAME text NOT NULL,
N_REGIONKEY INT NOT NULL,
N_COMMENT text NOT NULL
);
CALL set_table_property('NATION', 'distribution_key', 'N_NATIONKEY');
CALL set_table_property('NATION', 'colocate_with', 'LINEITEM');
CALL set_table_property('NATION', 'bitmap_columns', 'N_NATIONKEY,N_NAME,N_REGIONKEY,N_COMMENT');
CALL set_table_property('NATION', 'dictionary_encoding_columns', 'N_NAME,N_COMMENT');
CALL set_table_property('NATION', 'time_to_live_in_seconds', '31536000');
COMMIT;
DROP TABLE IF EXISTS REGION;
BEGIN;
CREATE TABLE REGION
(
R_REGIONKEY INT NOT NULL PRIMARY KEY,
R_NAME TEXT NOT NULL,
R_COMMENT TEXT
);
CALL set_table_property('REGION', 'distribution_key', 'R_REGIONKEY');
CALL set_table_property('REGION', 'colocate_with', 'LINEITEM');
CALL set_table_property('REGION', 'bitmap_columns', 'R_REGIONKEY,R_NAME,R_COMMENT');
CALL set_table_property('REGION', 'dictionary_encoding_columns', 'R_NAME,R_COMMENT');
CALL set_table_property('REGION', 'time_to_live_in_seconds', '31536000');
COMMIT;
- 创建完毕后,您能在 psql 中使用如下代码查看是否创建成功
tpch_1sf=# \dt
- 若成功,现实效果如下
tpch_1sf=# \dt
List of relations
Schema | Name | Type | Owner
--------+----------+-------+--------------------
public | customer | table | tpch_1sf_developer
public | lineitem | table | tpch_1sf_developer
public | nation | table | tpch_1sf_developer
public | orders | table | tpch_1sf_developer
public | part | table | tpch_1sf_developer
public | partsupp | table | tpch_1sf_developer
public | region | table | tpch_1sf_developer
public | supplier | table | tpch_1sf_developer
(8 rows)
2. 导入数据
- 本测试方案主要使用
COPY FROM STDIN
的方式导入数据详细可以参考官方文档。此处会将此前生成的 tbl 数据文件导入 Hologres 中创建的表中。 - 您可以在数据生成工具的目录中参考如下
shell
脚本导入数据
for i in `ls *.tbl`; do
echo $i;
name=`echo $i| cut -d'.' -f1`;
PGUSER=<AccessID> PGPASSWORD=<AccessKey> psql -p <Port> -h <Endpoint> -d <Database> -c "COPY $name from stdin with delimiter '|' csv;" < $i;
done
- 至此您已完成数据导入
3. 收集统计信息
- 为了更好的执行查询,可以在 psql 中使用如下语句,使 Hologres 收集各张表特征信息。
vacuum region;
vacuum nation;
vacuum supplier;
vacuum customer;
vacuum part;
vacuum partsupp;
vacuum orders;
vacuum lineitem;
analyze nation;
analyze region;
analyze lineitem;
analyze orders;
analyze customer;
analyze part;
analyze partsupp;
analyze supplier;
迁移开始
- 源holo实例数据准备完成后,开始执行迁移数据到目的holo
- 源holo和目的holo数据对比