java读取大文件的几种方式
public class MethodTest {
/**
* old IO BufferedReader
*
* @throws IOException
*/
public static void oldIOReadFile() throws IOException {
BufferedReader br = new BufferedReader(new FileReader("G://lily_947.txt"));
PrintWriter pw = new PrintWriter("G://oldIO.tmp");
char[] c = new char[100 * 1024 * 1024];
for (;;) {
if (br.read(c) != -1) {
pw.print(c);
} else {
break;
}
}
pw.close();
br.close();
}
/**
* NewIO
*
* @throws IOException
*/
public static void newIOReadFile() throws IOException {
FileChannel read = new RandomAccessFile("G://lily_947.txt", "r").getChannel();
FileChannel writer = new RandomAccessFile("G://newIO.tmp", "rw").getChannel();
ByteBuffer bb = ByteBuffer.allocate(200 * 1024 * 1024);
while (read.read(bb) != -1) {
bb.flip();
writer.write(bb);
bb.clear();
}
read.close();
writer.close();
}
/**
* randomReadFile
*
* @throws IOException
*/
public static void randomReadFile() throws IOException {
RandomAccessFile read = new RandomAccessFile("G://lily_947.txt", "r");
RandomAccessFile writer = new RandomAccessFile("G://random.tmp", "rw");
byte[] b = new byte[200 * 1024 * 1024];
while (read.read(b) != -1) {
writer.write(b);
}
writer.close();
read.close();
}
/**
* 内存直接映射
*
* @throws IOException
*/
public static void mappedBuffer() throws IOException {
FileChannel read = new FileInputStream("G://lily_947.txt").getChannel();
FileChannel writer = new RandomAccessFile("G://buffer.tmp", "rw").getChannel();
long i = 0;
long size = read.size() / 30;
ByteBuffer bb, cc = null;
while (i < read.size() && (read.size() - i) > size) {
bb = read.map(FileChannel.MapMode.READ_ONLY, i, size);
cc = writer.map(FileChannel.MapMode.READ_WRITE, i, size);
cc.put(bb);
i += size;
bb.clear();
cc.clear();
}
bb = read.map(FileChannel.MapMode.READ_ONLY, i, read.size() - i);
cc.put(bb);
bb.clear();
cc.clear();
read.close();
writer.close();
}
/**
* 小专题 关于 MappedByteBuffer 这里可以看出来 MappedByteBuffer 是 FileChannel.map出来 问题来了:没有unmap方法..... 后面提供一种方式
* 这个方法仍然不能读取超过2GB的文件
*/
// 文件复制
public void copyFile(String filename, String srcpath, String destpath) throws IOException {
File source = new File(srcpath + "/" + filename);
File dest = new File(destpath + "/" + filename);
FileChannel in = null, out = null;
try {
in = new FileInputStream(source).getChannel();
out = new FileOutputStream(dest).getChannel();
long size = in.size();
MappedByteBuffer buf = in.map(FileChannel.MapMode.READ_ONLY, 0, size);
out.write(buf);
in.close();
out.close();
source.delete();// 文件复制完成后,删除源文件
} catch (Exception e) {
e.printStackTrace();
} finally {
in.close();
out.close();
}
}
/*
* 其实讲到这里该问题的解决办法已然清晰明了了——就是在删除索引文件的同时还取消对应的内存映射,删除mapped对象。
* 不过令人遗憾的是,Java并没有特别好的解决方案——令人有些惊讶的是,Java没有为MappedByteBuffer提供unmap的方法,
* 该方法甚至要等到Java 10才会被引入 ,DirectByteBufferR类是不是一个公有类
* class DirectByteBufferR extends DirectByteBuffer implements DirectBuffer 使用默认访问修饰符
* 不过Java倒是提供了内部的“临时”解决方案——DirectByteBufferR.cleaner().clean() 切记这只是临时方法,
* 毕竟该类在Java9中就正式被隐藏了,而且也不是所有JVM厂商都有这个类。
* 还有一个解决办法就是显式调用System.gc(),让gc赶在cache失效前就进行回收。
* 不过坦率地说,这个方法弊端更多:首先显式调用GC是强烈不被推荐使用的,
* 其次很多生产环境甚至禁用了显式GC调用,所以这个办法最终没有被当做这个bug的解决方案。
*/
public static void clean(final Object buffer) throws Exception {
AccessController.doPrivileged(new PrivilegedAction() {
public Object run() {
try {
Method getCleanerMethod = buffer.getClass().getMethod("cleaner", new Class[0]);
getCleanerMethod.setAccessible(true);
sun.misc.Cleaner cleaner = (sun.misc.Cleaner)getCleanerMethod.invoke(buffer, new Object[0]);
cleaner.clean();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
});
}
// 同上
public static void clean(final MappedByteBuffer buffer) throws Exception {
if (buffer == null) {
return;
}
buffer.force();
AccessController.doPrivileged(new PrivilegedAction<Object>() {// Privileged特权
@Override
public Object run() {
try {
// System.out.println(buffer.getClass().getName());
Method getCleanerMethod = buffer.getClass().getMethod("cleaner", new Class[0]);
getCleanerMethod.setAccessible(true);
sun.misc.Cleaner cleaner = (sun.misc.Cleaner)getCleanerMethod.invoke(buffer, new Object[0]);
cleaner.clean();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
});
}
}
最后一种是比较快的,但是有不少问题要注意!