优化大批量数据写入单个文件时的内存占用问题
问题描述:在生成文件时内存占用率过高
原因:单次查询到的百万条数据存在的一个list数组中,单线程操作,内存高的原因也就是这list数组
测试代码如下:
耗时44007ms
@Test
public void testBatchWrite() throws IOException, InterruptedException {
long start = System.currentTimeMillis();
final List<Yddz> yddzs = yddzDao.selectYDDZ();
File file = new File("F:\\桌面\\testBatchWrite.txt");
final String YKT_SPLIT = ",";
final List<String> lines = new ArrayList<>();
for (Yddz r : yddzs) {
StringBuffer line = new StringBuffer();
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getEnd_date_s()) ? "" : r.getEnd_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getMerchant_name()) ? "" : r.getMerchant_name()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getEnd_date_str()) ? "" : r.getEnd_date_str()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getEnd_date_s()) ? "" : r.getEnd_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getMerchant_name()) ? "" : r.getMerchant_name()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getEnd_date_str()) ? "" : r.getEnd_date_str()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
lines.add(line.toString());
}
FileUtils.writeLines(file, "UTF-8", lines, "\n");
System.out.println("完成时间:"+(System.currentTimeMillis()-start));
}
以下是用93万条数据做的测试数据
优化操作:使用多线程将数据分批次读取,每个线程取相应的数据写入临时list后直接写入文件,这样可以避免一次性将所有数据写入一个list之后操作。在操作文件的时候可以不用加锁可以多线程写入,直接追加文件只要保证数据一致即可。
修改后的代码如下:
耗时26310ms
@Test
public void batchWriteTest2() throws InterruptedException {
final String YKT_SPLIT = ",";
long start = System.currentTimeMillis();
if (new File("F:\\桌面\\testBatchWrite2.txt").exists()){
new File("F:\\桌面\\testBatchWrite2.txt").delete();
}
final File file = new File("F:\\桌面\\testBatchWrite2.txt");
List<String> productIds = yddzDao.selectProductId();
final CountDownLatch count = new CountDownLatch(productIds.size());
for (final String productId : productIds) {
batchWriteExecutor.execute(new Runnable() {
List<String> lines2 = new ArrayList<>();
@Override
public void run() {
List<Yddz> yddzs1 = yddzDao.selectYDDZByProductId(productId);
for (Yddz r : yddzs1) {
StringBuffer line = new StringBuffer();
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getEnd_date_s()) ? "" : r.getEnd_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getMerchant_name()) ? "" : r.getMerchant_name()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getEnd_date_str()) ? "" : r.getEnd_date_str()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getEnd_date_s()) ? "" : r.getEnd_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getMerchant_name()) ? "" : r.getMerchant_name()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getEnd_date_str()) ? "" : r.getEnd_date_str()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
line.append(StringUtils.isEmpty(r.getBegin_date_s()) ? "" : r.getBegin_date_s()).append(YKT_SPLIT);
lines2.add(line.toString());
}
try {
FileUtils.writeLines(file, "UTF-8", lines2, "\n",true);
} catch (IOException e) {
e.printStackTrace();
count.countDown();
}
count.countDown();
}
});
}
count.await();
System.out.println("完成时间:"+(System.currentTimeMillis()-start)+"ms");
}