2018年11月5日 于南昌高新区 临近项目上线
最近在做一个银行的项目,银行存量交易很大,比如省银行下面有1亿多用户,行方将数据存放到ftp,我们下载下来,读文件入库。
用Scanner读取文件,将每一行的数据,加入到list中,然后调用Mybaits的insertBatch方法,插入到数据库,举个栗子
public void addScCostRevAlctFromFile(String fileEndDate) throws IOException {
String currentDate = fileEndDate;// DateUtils.formatDate(new Date(),
// "yyyyMMdd");
String dataFileFlagName = LoyParam.DOWN_LOCAL_PATH_BEFORE.concat(currentDate)
.concat(LoyParam.DOWN_LOCAL_PATH_AFTER).concat(LoyParam.FILE_CIR_CORE_LOAN_COST_REV_ALCT_)
.concat(currentDate).concat(LoyParam.DOWN_FILENAME_FLG);
String dataFileName = LoyParam.DOWN_LOCAL_PATH_BEFORE.concat(currentDate).concat(LoyParam.DOWN_LOCAL_PATH_AFTER)
.concat(LoyParam.FILE_CIR_CORE_LOAN_COST_REV_ALCT_).concat(currentDate)
.concat(LoyParam.DOWN_FILENAME_DAT);
boolean flag_flg = FileUtil.isExistence(dataFileFlagName);
boolean flag_dat = FileUtil.isExistence(dataFileName);
if (!(flag_flg && flag_dat)) {
// 其中一个不存在over
return;
}
Map<String, String> fileMap = this.getDataFlagInfo(dataFileFlagName);
if (fileMap.isEmpty()) {
logger.info("要访问的文件不存在" + dataFileFlagName);
return;
}
int datasize = 0;
if (!StringCommonUtils.isEmpty(fileMap.get("dataSum"))) {
datasize = Integer.parseInt(fileMap.get("dataSum"));
logger.info("数据文件名为:" + fileMap.get("dataFile"));
logger.info("数据文件大小为:" + fileMap.get("fileSize"));
logger.info("数据总数为:" + datasize);
}
// 年月日时分秒
String starttime = formatter.format(new Date());
// 10位的年月日
String currentY_M_D = formatterY_M_D.format(new Date());
long start = System.currentTimeMillis();
FileInputStream fis = new FileInputStream(dataFileName);
Scanner sc = new Scanner(fis, "GBK");
//创建备份表,插入备份表
this.deleteAndCreateTmpTable(LoyParam.TABLE_SC_COST_REV_ALCT);
int DB_COMMIT_COUNT=1000;
int y = datasize % DB_COMMIT_COUNT;
int inum = 0;
List<SCT> ststlist = new ArrayList<SCT>();
BigDecimal zeroBigDecimal = new BigDecimal(0);
ScProblemRecordPojo problemPojo=null;
List<SCT> problemList=new ArrayList<SCT>();
ScCostRevAlct scCost=null;
while (sc.hasNextLine()) {
String line = sc.nextLine();
String[] data = line.split("\\x03");
if (data.length > 0) {
inum++;
// logger.info("第"+inum+"行:"+line);
Map<Integer, String> map = new HashMap<Integer, String>();
for (int i = 0; i < data.length; i++) {
String data_ = StringCommonUtils.replaceBlank(data[i]);
map.put(i, data_);
}
try{
String cstId1 = StringFormat.trimNull(map.get(0));
String yearMonth1 = StringFormat.trimNull(map.get(1));
String cklr1 = StringFormat.trimNull(map.get(2));
String dklr1 = StringFormat.trimNull(map.get(3));
String zjywlr1 = StringFormat.trimNull(map.get(4));
String ckxc1 = StringFormat.trimNull(map.get(5));
String dkxc1 = StringFormat.trimNull(map.get(6));
String zjywxc1 = StringFormat.trimNull(map.get(7));
String zhgxd1 = StringFormat.trimNull(map.get(8));
String orgId = StringFormat.trimNull(map.get(9));
scCost = new ScCostRevAlct(cstId1, yearMonth1,
StringCommonUtils.isEmpty(cklr1) ? zeroBigDecimal : new BigDecimal(cklr1),
StringCommonUtils.isEmpty(dklr1) ? zeroBigDecimal : new BigDecimal(dklr1),
StringCommonUtils.isEmpty(zjywlr1) ? zeroBigDecimal : new BigDecimal(zjywlr1),
StringCommonUtils.isEmpty(ckxc1) ? zeroBigDecimal : new BigDecimal(ckxc1),
StringCommonUtils.isEmpty(dkxc1) ? zeroBigDecimal : new BigDecimal(dkxc1),
StringCommonUtils.isEmpty(zjywxc1) ? zeroBigDecimal : new BigDecimal(zjywxc1),
StringCommonUtils.isEmpty(zhgxd1) ? zeroBigDecimal : new BigDecimal(zhgxd1),currentY_M_D,orgId);
}catch(Exception ex){
logger.error("转换信息异常"+ex.getMessage());
logger.info("========》我出错了 "+inum);
problemPojo=new ScProblemRecordPojo(
scCost==null?"":scCost.toString(),
DateUtils.YYYYMMDD,
DateUtils.YMDHMS,
dataFileName,
((Integer)inum).toString()
);
problemList.add(problemPojo);
this.scCredTmTxnHstService.insertBatch(problemList, dataFileName);
continue;
}
ststlist.add(scCost);
map.clear();
if (ststlist.size() == DB_COMMIT_COUNT) {
scCredTmTxnHstService.insertBatch(ststlist,dataFileName);
ststlist = null;
ststlist = new ArrayList<SCT>();
continue;
}
if (y > 0 && inum == datasize) {
scCredTmTxnHstService.insertBatch(ststlist,dataFileName);
break;
}
}
}
logger.info("解析数据完成!共计" + inum + "条数据");
sc.close();
fis.close();
long end = System.currentTimeMillis();
System.out.println("时长为:---------------" + (start - end) + "毫秒---------------");
if (inum == datasize) {
FileOperate.delete(dataFileName);
scEngineeEtlmxService.addScEngineeEtlmx(LoyParam.FILE_CIR_CORE_LOAN_COST_REV_ALCT_, "数仓数据文件之"+dataFileName, starttime,
formatter.format(new Date()), StringFormat.msecondToTime(end - start), currentY_M_D, datasize, inum,
currentY_M_D, "Y");
//如果成功做一系列处理
this.insertDBSuccessProcessTable(LoyParam.TABLE_SC_COST_REV_ALCT);
}
}
这其中用到了批量插入,最后实测,如果批量200条,连接一次数据库,做插入的话,5000万条数据,需要24个小时,妈呀,太长了。如果增加批量的条数呢?比如增加到1000条提交一次呢?
<insert id="insertBatch" parameterType="java.util.List">
insert into SC_TBL_MCHT_BASE_INF_TMP (MCHT_NO, MCHT_NM, RISL_LVL,
MCHT_LVL, MCHT_STATUS, MANU_AUTH_FLAG,
PART_NUM, DISC_CONS_FLG, DISC_CONS_REBATE,
PASS_FLAG, OPEN_DAYS, SLEEP_DAYS,
MCHT_CN_ABBR, SPELL_NAME, ENG_NAME,
MCHT_EN_ABBR, AREA_NO, SETTLE_AREA_NO,
ADDR, HOME_PAGE, MCC,
TCC, ETPS_ATTR, MNG_MCHT_ID,
MCHT_GRP, MCHT_ATTR, MCHT_GROUP_FLAG,
MCHT_GROUP_ID, MCHT_ENG_NM, MCHT_ENG_ADDR,
MCHT_ENG_CITY_NAME, SA_LIMIT_AMT, SA_ACTION,
PSAM_NUM, CD_MAC_NUM, POS_NUM,
CONN_TYPE, MCHT_MNG_MODE, MCHT_FUNCTION,
LICENCE_NO, LICENCE_END_DATE, BANK_LICENCE_NO,
BUS_TYPE, FAX_NO, BUS_AMT,
MCHT_CRE_LVL, CONTACT, POST_CODE,
COMM_EMAIL, COMM_MOBIL, COMM_TEL,
MANAGER, ARTIF_CERTIF_TP, IDENTITY_NO,
MANAGER_TEL, FAX, ELECTROFAX,
REG_ADDR, APPLY_DATE, ENABLE_DATE,
PRE_AUD_NM, CONFIRM_NM, PROTOCAL_ID,
SIGN_INST_ID, NET_NM, AGR_BR,
NET_TEL, PROL_DATE, PROL_TLR,
CLOSE_DATE, CLOSE_TLR, MAIN_TLR,
CHECK_TLR, OPER_NO, OPER_NM,
PROC_FLAG, SET_CUR, PRINT_INST_ID,
ACQ_INST_ID, ACQ_BK_NAME, BANK_NO,
ORGN_NO, SUBBRH_NO, SUBBRH_NM,
OPEN_TIME, CLOSE_TIME, VIS_ACT_FLG,
VIS_MCHT_ID, MST_ACT_FLG, MST_MCHT_ID,
AMX_ACT_FLG, AMX_MCHT_ID, DNR_ACT_FLG,
DNR_MCHT_ID, JCB_ACT_FLG, JCB_MCHT_ID,
CUP_MCHT_FLG, DEB_MCHT_FLG, CRE_MCHT_FLG,
CDC_MCHT_FLG, RESERVED, UPD_OPR_ID,
CRT_OPR_ID, REC_UPD_TS, REC_CRT_TS,
LAST_ETL_ACG_DT, DEL_F,TBDATE)
values
<foreach collection ="list" item="record" separator =",">
(#{record.mchtNo,jdbcType=VARCHAR}, #{record.mchtNm,jdbcType=VARCHAR}, #{record.rislLvl,jdbcType=VARCHAR},
#{record.mchtLvl,jdbcType=VARCHAR}, #{record.mchtStatus,jdbcType=VARCHAR}, #{record.manuAuthFlag,jdbcType=VARCHAR},
#{record.partNum,jdbcType=VARCHAR}, #{record.discConsFlg,jdbcType=VARCHAR}, #{record.discConsRebate,jdbcType=VARCHAR},
#{record.passFlag,jdbcType=VARCHAR}, #{record.openDays,jdbcType=VARCHAR}, #{record.sleepDays,jdbcType=VARCHAR},
#{record.mchtCnAbbr,jdbcType=VARCHAR}, #{record.spellName,jdbcType=VARCHAR}, #{record.engName,jdbcType=VARCHAR},
#{record.mchtEnAbbr,jdbcType=VARCHAR}, #{record.areaNo,jdbcType=VARCHAR}, #{record.settleAreaNo,jdbcType=VARCHAR},
#{record.addr,jdbcType=VARCHAR}, #{record.homePage,jdbcType=VARCHAR}, #{record.mcc,jdbcType=VARCHAR},
#{record.tcc,jdbcType=VARCHAR}, #{record.etpsAttr,jdbcType=VARCHAR}, #{record.mngMchtId,jdbcType=VARCHAR},
#{record.mchtGrp,jdbcType=VARCHAR}, #{record.mchtAttr,jdbcType=VARCHAR}, #{record.mchtGroupFlag,jdbcType=VARCHAR},
#{record.mchtGroupId,jdbcType=VARCHAR}, #{record.mchtEngNm,jdbcType=VARCHAR}, #{record.mchtEngAddr,jdbcType=VARCHAR},
#{record.mchtEngCityName,jdbcType=VARCHAR}, #{record.saLimitAmt,jdbcType=VARCHAR}, #{record.saAction,jdbcType=VARCHAR},
#{record.psamNum,jdbcType=VARCHAR}, #{record.cdMacNum,jdbcType=VARCHAR}, #{record.posNum,jdbcType=VARCHAR},
#{record.connType,jdbcType=VARCHAR}, #{record.mchtMngMode,jdbcType=VARCHAR}, #{record.mchtFunction,jdbcType=VARCHAR},
#{record.licenceNo,jdbcType=VARCHAR}, #{record.licenceEndDate,jdbcType=VARCHAR}, #{record.bankLicenceNo,jdbcType=VARCHAR},
#{record.busType,jdbcType=VARCHAR}, #{record.faxNo,jdbcType=VARCHAR}, #{record.busAmt,jdbcType=VARCHAR},
#{record.mchtCreLvl,jdbcType=VARCHAR}, #{record.contact,jdbcType=VARCHAR}, #{record.postCode,jdbcType=VARCHAR},
#{record.commEmail,jdbcType=VARCHAR}, #{record.commMobil,jdbcType=VARCHAR}, #{record.commTel,jdbcType=VARCHAR},
#{record.manager,jdbcType=VARCHAR}, #{record.artifCertifTp,jdbcType=VARCHAR}, #{record.identityNo,jdbcType=VARCHAR},
#{record.managerTel,jdbcType=VARCHAR}, #{record.fax,jdbcType=VARCHAR}, #{record.electrofax,jdbcType=VARCHAR},
#{record.regAddr,jdbcType=VARCHAR}, #{record.applyDate,jdbcType=VARCHAR}, #{record.enableDate,jdbcType=VARCHAR},
#{record.preAudNm,jdbcType=VARCHAR}, #{record.confirmNm,jdbcType=VARCHAR}, #{record.protocalId,jdbcType=VARCHAR},
#{record.signInstId,jdbcType=VARCHAR}, #{record.netNm,jdbcType=VARCHAR}, #{record.agrBr,jdbcType=VARCHAR},
#{record.netTel,jdbcType=VARCHAR}, #{record.prolDate,jdbcType=VARCHAR}, #{record.prolTlr,jdbcType=VARCHAR},
#{record.closeDate,jdbcType=VARCHAR}, #{record.closeTlr,jdbcType=VARCHAR}, #{record.mainTlr,jdbcType=VARCHAR},
#{record.checkTlr,jdbcType=VARCHAR}, #{record.operNo,jdbcType=VARCHAR}, #{record.operNm,jdbcType=VARCHAR},
#{record.procFlag,jdbcType=VARCHAR}, #{record.setCur,jdbcType=VARCHAR}, #{record.printInstId,jdbcType=VARCHAR},
#{record.acqInstId,jdbcType=VARCHAR}, #{record.acqBkName,jdbcType=VARCHAR}, #{record.bankNo,jdbcType=VARCHAR},
#{record.orgnNo,jdbcType=VARCHAR}, #{record.subbrhNo,jdbcType=VARCHAR}, #{record.subbrhNm,jdbcType=VARCHAR},
#{record.openTime,jdbcType=VARCHAR}, #{record.closeTime,jdbcType=VARCHAR}, #{record.visActFlg,jdbcType=VARCHAR},
#{record.visMchtId,jdbcType=VARCHAR}, #{record.mstActFlg,jdbcType=VARCHAR}, #{record.mstMchtId,jdbcType=VARCHAR},
#{record.amxActFlg,jdbcType=VARCHAR}, #{record.amxMchtId,jdbcType=VARCHAR}, #{record.dnrActFlg,jdbcType=VARCHAR},
#{record.dnrMchtId,jdbcType=VARCHAR}, #{record.jcbActFlg,jdbcType=VARCHAR}, #{record.jcbMchtId,jdbcType=VARCHAR},
#{record.cupMchtFlg,jdbcType=VARCHAR}, #{record.debMchtFlg,jdbcType=VARCHAR}, #{record.creMchtFlg,jdbcType=VARCHAR},
#{record.cdcMchtFlg,jdbcType=VARCHAR}, #{record.reserved,jdbcType=VARCHAR}, #{record.updOprId,jdbcType=VARCHAR},
#{record.crtOprId,jdbcType=VARCHAR}, #{record.recUpdTs,jdbcType=VARCHAR}, #{record.recCrtTs,jdbcType=VARCHAR},
#{record.lastEtlAcgDt,jdbcType=VARCHAR}, #{record.delF,jdbcType=SMALLINT}
,#{record.tbDate,jdbcType=VARCHAR}
)
</foreach>
</insert>
当我们增大到1000条插入一次的时候,会发现,SQL ERROR,
由于分发协议错误而导致,其实是SQL语句的长度超出了db2缓存的大小,
其实insertBatch其实一开始是在拼装sql,最后将拼装的SQL入库。
如果数据库缓存的值设置的太小,那么就会造成SQL超长而报错,那么如何增大db2数据库缓存呢。
见下面的方法:
db_name 数据库名称
db_user 数据库用户
db_password 数据库用户的密码
其中的50000是KB,也就是50M,设置缓存的大小为50M,我的服务器是8GB内存,那么可以将内存设置成100M。
如果缓存设置成50M,如果表有40个字段,那么可以单次插入800条数据。
如果设置成100M,如果表有40字段,那么可以单次插入1000到15000条数据,这样大大解决了数据库连接频繁造成性能降低的瓶颈问题。这样单次导入5000万条数据,可以在3个小时内完成
另外,可以在命令行处理器中执行:
CONNECT TO db_name USER db_user using db_password
然后加大缓冲页的大小:
ALTER BUFFERPOOL IBMDEFAULTBP SIZE -1
UPDATE DB CFG FOR db_name USING BUFFPAGE 50000
完整的文件,应该给过维护组,下面贴出来一次:
--后续语句必需在服务器上使用db_name 用户登录
--执行select * from syscat.bufferpools 当syscat.bufferpools的npages是-1时,才由数据库的配置参数buffpage控制缓冲池的大小。
--先设置syscat.bufferpools.npages为-1(原默认值可能为250),使得设置的参数BUFFPAGE生效,然后修改BUFFPAGE为50000*4K(200M,根据物理内存可适当加大,充分理由闲置内存,但一定要预留一定的内存用于日常业务繁忙时的情况)
--注意,该语句必须要在服务器上执行,在客户端执行可能不成功。在数据库升级程序中运行也可能不成功。最好手工在服务器的命令行处理器中执行。
CONNECT TO db_name USER db_user using db_password
ALTER BUFFERPOOL IBMDEFAULTBP SIZE -1
UPDATE DB CFG FOR db_name USING BUFFPAGE 50000