下面代码使用两种方式读取日志文件,一种是流方式,一种是内存映射:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.CharBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.Scanner;
import java.util.zip.GZIPInputStream;
public class Test {
static String path = "D:\\log\\proclog\\loganalysis\\0108161331_CHN-MY-1_1021235501.log";
public static void main(String[] s) throws IOException {
stream();
mem();
}
public static void stream() throws FileNotFoundException, IOException {
Long startTime = System.currentTimeMillis();
BufferedReader reader = getReader(new File(path));
String line;
while ((line = reader.readLine()) != null) {
// 空转
}
Long estimatedTime = System.currentTimeMillis() - startTime;
System.out.printf("stream Diff: %d ms\n", estimatedTime);
}
public static BufferedReader getReader(File f) throws FileNotFoundException, IOException {
BufferedReader reader = null;
if (f.getName().endsWith(".gz")) {
reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(f))));
} else {
reader = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
}
return reader;
}
public static void mem() throws IOException {
Long startTime = System.currentTimeMillis();
FileChannel fc = new FileInputStream(path).getChannel();
MappedByteBuffer byteBuffer = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
//Charset charset = Charset.forName("US-ASCII");
Charset charset = Charset.forName("iso-8859-1");
CharsetDecoder decoder = charset.newDecoder();
CharBuffer charBuffer = decoder.decode(byteBuffer);
Scanner sc = new Scanner(charBuffer).useDelimiter(System.getProperty("line.separator"));
while (sc.hasNext()) {
sc.next();
}
fc.close();
Long estimatedTime = System.currentTimeMillis() - startTime;
System.out.printf("mem Diff: %d ms", estimatedTime);
}
}
输出:
stream Diff: 147 ms
mem Diff: 2470 ms
mem Diff: 2470 ms
从输出来看流方式要远远快于内存映射读取,看来逐行读取文本还是继续使用steam api吧。
PS. 测试文件大小23MB,使用一个100MB的文件,mem方式报内存溢出,有点尴尬,先做个记号吧。
参考:
http://hi.baidu.com/limin040206/blog/item/92763dfcd301ff0008244d48.html
http://jiangzhengjun.iteye.com/blog/515745
http://stackoverflow.com/questions/1045632/bufferedreader-for-large-bytebuffer
-- end --