1.pg_bulkload工具介绍
pg_bulkload 是一个高速数据库加载工具,主要用于将大量数据加载到数据库中。
相比 copy 命令最大的优势就是速度。
优势在让我们跳过 shared buffer、wal buffer,直接写文件。
pg_bulkload 主要包括两个模块:reader 和 writer。
reader 负责读取文件、解析 tuple,
writer 负责把解析出的 tuple 写入输出源中。
pg_bulkload 最初的版本功能很简单,只是加载数据,更快地替代 COPY 命令,
pg_bulkload3.0 版本或更高版本具有一些 ETL 功能,如输入数据验证和带有过滤函数的数据转换。
2.软件使用
tar xvf pg_bulkload-3.1.20.tar.gz
cd pg_bulkload-3.1.20
make
make install
3.安装插件
[pgsql@oracle1:/home/pgsql]$psql -h 192.168.1.10 -U postgres -d sspudb
psql (15.3)
Type "help" for help.
sspudb=# create extension pg_bulkload;
CREATE EXTENSION
sspudb=# exit
[pgsql@oracle1:/home/pgsql]$psql -h 192.168.1.10 -U postgres -d postgres
psql (15.3)
Type "help" for help.
postgres=# create extension pg_bulkload;
CREATE EXTENSION
4.数据准备
create table sspudb800w(id int,name text ,writetime timestamp(0));
insert into sspudb800w select generate_series(1,8000000),md5(random()::text),clock_timestamp();
create index i_sspudb800w_id on sspudb800w(id);
sspudb=# create table sspudb800w(id int,name text ,writetime timestamp(0));
CREATE TABLE
sspudb=# insert into sspudb800w select generate_series(1,8000000),md5(random()::text),clock_timestamp();
INSERT 0 8000000
sspudb=# create index i_sspudb800w_id on sspudb800w(id);
CREATE INDEX
5.导出并使用Pg_bulkload导入
sspudb=# copy sspudb800w to '/postgresql/backup/sspudb800w.csv' with(format 'csv');
COPY 8000000
--导入
--O:表名
sspudb=# truncate table sspudb800w;
TRUNCATE TABLE
pg_bulkload -i /postgresql/backup/sspudb800w.csv -O sspudb800w -l /postgresql/backup/sspudb800w.log -h 192.168.1.10 -p 5432 -U postgres -d sspudb -o "TYPE=CSV" -o "TRUNCATE=YES"
[pgsql@oracle1:/home/pgsql]$pg_bulkload -i /postgresql/backup/sspudb800w.csv -O sspudb800w -l /postgresql/backup/sspudb800w.log -h 192.168.1.10 -p 5432 -U postgres -d sspudb -o "TYPE=CSV" -o "TRUNCATE=YES"
NOTICE: BULK LOAD START
NOTICE: BULK LOAD END
0 Rows skipped.
8000000 Rows successfully loaded.
0 Rows not loaded due to parse errors.
0 Rows not loaded due to duplicate errors.
0 Rows replaced with new rows.
--并行写入有问题。
-o "WRITER=PARALLEL"
-o "WRITER=DIRECT" --修改为DIRECT重新导入。
[pgsql@oracle1:/home/pgsql]$pg_bulkload -i /postgresql/backup/sspudb800w.csv -O sspudb800w -l /postgresql/backup/sspudb800w.log -h 192.168.1.10 -p 5432 -U postgres -d sspudb -o "TYPE=CSV" -o "WRITER=DIRECT" -o "TRUNCATE=YES"
NOTICE: BULK LOAD START
NOTICE: BULK LOAD END
0 Rows skipped.
8000000 Rows successfully loaded.
0 Rows not loaded due to parse errors.
0 Rows not loaded due to duplicate errors.
0 Rows replaced with new rows.
sspudb=# select count(1) from sspudb800w;
count
---------
8000000
(1 row)