最近要做一个数据采集分析的系统,每五分钟采集6G数据,采集数据每一行为一条完整记录,对一条记录split处理。为了找到最优split方法,测试jdk、lang和guava split 字符串效率。
测试读取250m的文件,有100万行数据,测试数据对比:
[img]http://dl2.iteye.com/upload/attachment/0088/8015/b9590c84-2fb0-3f2a-84fb-b352c6d3ed1c.png[/img]
测试读取250m的文件,有100万行数据,测试数据对比:
[img]http://dl2.iteye.com/upload/attachment/0088/8015/b9590c84-2fb0-3f2a-84fb-b352c6d3ed1c.png[/img]
@AxisRange(min = 0)
@BenchmarkMethodChart(filePrefix = "split-benchmark-barchart")
public class SplitTest {
@Rule
public BenchmarkRule benchmarkRun = new BenchmarkRule();
private static final Splitter splitter = Splitter.on('|').omitEmptyStrings();
@Test
public void jdkSplitTest() throws IOException {
File file = new File("G:/huawu/PS_FileInnerMon1_20130723170104-14076.dat");
Files.readLines(file, Charsets.UTF_8, new LineProcessor<Integer>() {
int count;
@Override
public boolean processLine(String line) throws IOException {
count++;
line.split("|");
return true;
}
@Override
public Integer getResult() {
return count;
}
});
}
@Test
public void langSplitTest() throws IOException {
File file = new File("G:/huawu/PS_FileInnerMon1_20130723170003-9836.dat");
Files.readLines(file, Charsets.UTF_8, new LineProcessor<Integer>() {
int count;
@Override
public boolean processLine(String line) throws IOException {
count++;
StringUtils.split(line, '|');
return true;
}
@Override
public Integer getResult() {
return count;
}
});
}
@Test
public void guavaSplitTest() throws IOException {
File file = new File("G:/huawu/PS_FileInnerMon1_20130723170003-9836.dat");
Files.readLines(file, Charsets.UTF_8, new LineProcessor<Integer>() {
int count;
@Override
public boolean processLine(String line) throws IOException {
count++;
splitter.split(line);
return true;
}
@Override
public Integer getResult() {
return count;
}
});
}
}