--dw_est_act_acct_item_02_yyyymmdd.dat 8.1G 记录条数 83900798
--split -l 20975200 -d -a 1 dw_est_act_acct_item_02_yyyymmdd.dat aa
--log errors into err_ext_dw_est_04_ext segment reject limit 10 rows;
CREATE EXTERNAL TABLE ext_dw_est_04
(
serv_id numeric(12,0),
mkt_chnl_id numeric(9,0),
mkt_chnl_name varchar(40),
area_id numeric(9,0),
area_name varchar(40),
product_id numeric(9,0),
acct_item_type_id numeric(9,0),
country_type varchar(1) ,
multi_proudct_mark numeric(3,0),
accounts_resid numeric(3,0),
charge numeric(16,5),
deal_source integer
)
LOCATION (
'gpfdist://136.6.246.132:8081/liuhb/aa0',
'gpfdist://192.168.2.132:8081/liuhb/aa1',
'gpfdist://192.168.3.132:8081/liuhb/aa2',
'gpfdist://192.168.4.132:8081/liuhb/aa3'
)
FORMAT 'text' (delimiter '' null '' escape 'OFF')
ENCODING 'UTF8';
CREATE EXTERNAL TABLE ext_dw_est_01
(
serv_id numeric(12,0),
mkt_chnl_id numeric(9,0),
mkt_chnl_name varchar(40),
area_id numeric(9,0),
area_name varchar(40),
product_id numeric(9,0),
acct_item_type_id numeric(9,0),
country_type varchar(1) ,
multi_proudct_mark numeric(3,0),
accounts_resid numeric(3,0),
charge numeric(16,5),
deal_source integer
)
LOCATION (
'gpfdist://192.168.2.132:8081/liuhb/dw_est_act_acct_item_02_yyyymmdd.dat'
)
FORMAT 'text' (delimiter '' null '' escape 'OFF')
ENCODING 'UTF8';
--83900798 139156 137094 159250
select count(*) from ext_dw_est_01;
--83900798 41422 53328 58703 19016 75640 18968 46312
select count(*) from ext_dw_est_04;
--总结:
我测试的系统在master节点有4张网卡与segment连接,可以根据master的网卡数分割文件后可以达到比较高的装载速度。如果IO在装载不是瓶颈时,我们就可以从网络入手分析。对于并行装载的问题,官方文档上说需要分割成与segment匹配的文件数。在可以利用到并行。这个需要测试验证。