java nio 大文件_Java NIO写大文件对比(win7和mac)

测试说明

写2G文件,分批次写入,每批次写入128MB;

分别在Win7系统(3G内存,双核,32位,T系列处理器)和MacOS系统(8G内存,四核,64位,i7系列处理器)下运行测试。理论上跟硬盘类型和配置也有关系,这里不再贴出了。

测试代码

package rwbigfile;

import java.io.ByteArrayInputStream;

import java.io.File;

import java.io.IOException;

import java.io.RandomAccessFile;

import java.lang.reflect.Method;

import java.nio.ByteBuffer;

import java.nio.MappedByteBuffer;

import java.nio.channels.Channels;

import java.nio.channels.FileChannel;

import java.nio.channels.FileChannel.MapMode;

import java.nio.channels.ReadableByteChannel;

import java.security.AccessController;

import java.security.PrivilegedAction;

import util.StopWatch;

/**

* NIO写大文件比较

* @author Will

*

*/

public class WriteBigFileComparison {

// data chunk be written per time

private static final int DATA_CHUNK = 128 * 1024 * 1024;

// total data size is 2G

private static final long LEN = 2L * 1024 * 1024 * 1024L;

public static void writeWithFileChannel() throws IOException {

File file = new File("e:/test/fc.dat");

if (file.exists()) {

file.delete();

}

RandomAccessFile raf = new RandomAccessFile(file, "rw");

FileChannel fileChannel = raf.getChannel();

byte[] data = null;

long len = LEN;

ByteBuffer buf = ByteBuffer.allocate(DATA_CHUNK);

int dataChunk = DATA_CHUNK / (1024 * 1024);

while (len >= DATA_CHUNK) {

System.out.println("write a data chunk: " + dataChunk + "MB");

buf.clear(); // clear for re-write

data = new byte[DATA_CHUNK];

for (int i = 0; i < DATA_CHUNK; i++) {

buf.put(data[i]);

}

data = null;

buf.flip(); // switches a Buffer from writing mode to reading mode

fileChannel.write(buf);

fileChannel.force(true);

len -= DATA_CHUNK;

}

if (len > 0) {

System.out.println("write rest data chunk: " + len + "B");

buf = ByteBuffer.allocateDirect((int) len);

data = new byte[(int) len];

for (int i = 0; i < len; i++) {

buf.put(data[i]);

}

buf.flip(); // switches a Buffer from writing mode to reading mode, position to 0, limit not changed

fileChannel.write(buf);

fileChannel.force(true);

data = null;

}

fileChannel.close();

raf.close();

}

/**

* write big file with MappedByteBuffer

* @throws IOException

*/

public static void writeWithMappedByteBuffer() throws IOException {

File file = new File("e:/test/mb.dat");

if (file.exists()) {

file.delete();

}

RandomAccessFile raf = new RandomAccessFile(file, "rw");

FileChannel fileChannel = raf.getChannel();

int pos = 0;

MappedByteBuffer mbb = null;

byte[] data = null;

long len = LEN;

int dataChunk = DATA_CHUNK / (1024 * 1024);

while (len >= DATA_CHUNK) {

System.out.println("write a data chunk: " + dataChunk + "MB");

mbb = fileChannel.map(MapMode.READ_WRITE, pos, DATA_CHUNK);

data = new byte[DATA_CHUNK];

mbb.put(data);

data = null;

len -= DATA_CHUNK;

pos += DATA_CHUNK;

}

if (len > 0) {

System.out.println("write rest data chunk: " + len + "B");

mbb = fileChannel.map(MapMode.READ_WRITE, pos, len);

data = new byte[(int) len];

mbb.put(data);

}

data = null;

unmap(mbb); // release MappedByteBuffer

fileChannel.close();

}

public static void writeWithTransferTo() throws IOException {

File file = new File("e:/test/transfer.dat");

if (file.exists()) {

file.delete();

}

RandomAccessFile raf = new RandomAccessFile(file, "rw");

FileChannel toFileChannel = raf.getChannel();

long len = LEN;

byte[] data = null;

ByteArrayInputStream bais = null;

ReadableByteChannel fromByteChannel = null;

long position = 0;

int dataChunk = DATA_CHUNK / (1024 * 1024);

while (len >= DATA_CHUNK) {

System.out.println("write a data chunk: " + dataChunk + "MB");

data = new byte[DATA_CHUNK];

bais = new ByteArrayInputStream(data);

fromByteChannel = Channels.newChannel(bais);

long count = DATA_CHUNK;

toFileChannel.transferFrom(fromByteChannel, position, count);

data = null;

position += DATA_CHUNK;

len -= DATA_CHUNK;

}

if (len > 0) {

System.out.println("write rest data chunk: " + len + "B");

data = new byte[(int) len];

bais = new ByteArrayInputStream(data);

fromByteChannel = Channels.newChannel(bais);

long count = len;

toFileChannel.transferFrom(fromByteChannel, position, count);

}

data = null;

toFileChannel.close();

fromByteChannel.close();

}

/**

* 在MappedByteBuffer释放后再对它进行读操作的话就会引发jvm crash,在并发情况下很容易发生

* 正在释放时另一个线程正开始读取,于是crash就发生了。所以为了系统稳定性释放前一般需要检

* 查是否还有线程在读或写

* @param mappedByteBuffer

*/

public static void unmap(final MappedByteBuffer mappedByteBuffer) {

try {

if (mappedByteBuffer == null) {

return;

}

mappedByteBuffer.force();

AccessController.doPrivileged(new PrivilegedAction() {

@Override

@SuppressWarnings("restriction")

public Object run() {

try {

Method getCleanerMethod = mappedByteBuffer.getClass()

.getMethod("cleaner", new Class[0]);

getCleanerMethod.setAccessible(true);

sun.misc.Cleaner cleaner =

(sun.misc.Cleaner) getCleanerMethod

.invoke(mappedByteBuffer, new Object[0]);

cleaner.clean();

} catch (Exception e) {

e.printStackTrace();

}

System.out.println("clean MappedByteBuffer completed");

return null;

}

});

} catch (Exception e) {

e.printStackTrace();

}

}

public static void main(String[] args) throws IOException {

StopWatch sw = new StopWatch();

sw.startWithTaskName("write with file channel's write(ByteBuffer)");

writeWithFileChannel();

sw.stopAndPrint();

sw.startWithTaskName("write with file channel's transferTo");

writeWithTransferTo();

sw.stopAndPrint();

sw.startWithTaskName("write with MappedByteBuffer");

writeWithMappedByteBuffer();

sw.stopAndPrint();

}

}

测试结果(Y轴是耗时秒数)

5c0c3a6b6d0de68c00fc038ba47a8a46.png

显然writeWithMappedByteBuffer方式性能最好,且在硬件配置较高情况下优势越加明显

在硬件配置较低情况下,writeWithTransferTo比writeWithFileChannel性能稍好

在硬件配置较高情况下,writeWithTransferTo和writeWithFileChannel的性能基本持平

此外,注意writeWithMappedByteBuffer方式除了占用JVM堆内存外,还要占用额外的native内存(Direct Byte Buffer内存)

内存映射文件使用经验

MappedByteBuffer需要占用“双倍”的内存(对象JVM堆内存和Direct Byte Buffer内存),可以通过-XX:MaxDirectMemorySize参数设置后者最大大小

不要频繁调用MappedByteBuffer的force()方法,因为这个方法会强制OS刷新内存中的数据到磁盘,从而只能获得些微的性能提升(相比IO方式),可以用后面的代码实例进行定时、定量刷新

如果突然断电或者服务器突然Down,内存映射文件数据可能还没有写入磁盘,这时就会丢失一些数据。为了降低这种风险,避免用MappedByteBuffer写超大文件,可以把大文件分割成几个小文件,但不能太小(否则将失去性能优势)

ByteBuffer的rewind()方法将position属性设回为0,因此可以重新读取buffer中的数据;limit属性保持不变,因此可读取的字节数不变

ByteBuffer的flip()方法将一个Buffer由写模式切换到读模式

ByteBuffer的clear()和compact()可以在我们读完ByteBuffer中的数据后重新切回写模式。不同的是clear()会将position设置为0,limit设为capacity,换句话说Buffer被清空了,但Buffer内的数据并没有被清空。如果Buffer中还有未被读取的数据,那调用clear()之后,这些数据会被“遗忘”,再写入就会覆盖这些未读数据。而调用compcat()之后,这些未被读取的数据仍然可以保留,因为它将所有还未被读取的数据拷贝到Buffer的左端,然后设置position为紧随未读数据之后,limit被设置为capacity,未读数据不会被覆盖

定时、定量刷新内存映射文件到磁盘

import java.io.File;

import java.io.IOException;

import java.io.RandomAccessFile;

import java.nio.MappedByteBuffer;

import java.nio.channels.FileChannel;

public class MappedFile {

// 文件名

private String fileName;

// 文件所在目录路径

private String fileDirPath;

// 文件对象

private File file;

private MappedByteBuffer mappedByteBuffer;

private FileChannel fileChannel;

private boolean boundSuccess = false;

// 文件最大只能为50MB

private final static long MAX_FILE_SIZE = 1024 * 1024 * 50;

// 最大的脏数据量512KB,系统必须触发一次强制刷

private long MAX_FLUSH_DATA_SIZE = 1024 * 512;

// 最大的刷间隔,系统必须触发一次强制刷

private long MAX_FLUSH_TIME_GAP = 1000;

// 文件写入位置

private long writePosition = 0;

// 最后一次刷数据的时候

private long lastFlushTime;

// 上一次刷的文件位置

private long lastFlushFilePosition = 0;

public MappedFile(String fileName, String fileDirPath) {

super();

this.fileName = fileName;

this.fileDirPath = fileDirPath;

this.file = new File(fileDirPath + "/" + fileName);

if (!file.exists()) {

try {

file.createNewFile();

} catch (IOException e) {

e.printStackTrace();

}

}

}

/**

*

* 内存映照文件绑定

* @return

*/

public synchronized boolean boundChannelToByteBuffer() {

try {

RandomAccessFile raf = new RandomAccessFile(file, "rw");

this.fileChannel = raf.getChannel();

} catch (Exception e) {

e.printStackTrace();

this.boundSuccess = false;

return false;

}

try {

this.mappedByteBuffer = this.fileChannel

.map(FileChannel.MapMode.READ_WRITE, 0, MAX_FILE_SIZE);

} catch (IOException e) {

e.printStackTrace();

this.boundSuccess = false;

return false;

}

this.boundSuccess = true;

return true;

}

/**

* 写数据:先将之前的文件删除然后重新

* @param data

* @return

*/

public synchronized boolean writeData(byte[] data) {

return false;

}

/**

* 在文件末尾追加数据

* @param data

* @return

* @throws Exception

*/

public synchronized boolean appendData(byte[] data) throws Exception {

if (!boundSuccess) {

boundChannelToByteBuffer();

}

writePosition = writePosition + data.length;

if (writePosition >= MAX_FILE_SIZE) { // 如果写入data会超出文件大小限制,不写入

flush();

writePosition = writePosition - data.length;

System.out.println("File="

+ file.toURI().toString()

+ " is written full.");

System.out.println("already write data length:"

+ writePosition

+ ", max file size=" + MAX_FILE_SIZE);

return false;

}

this.mappedByteBuffer.put(data);

// 检查是否需要把内存缓冲刷到磁盘

if ( (writePosition - lastFlushFilePosition > this.MAX_FLUSH_DATA_SIZE)

||

(System.currentTimeMillis() - lastFlushTime > this.MAX_FLUSH_TIME_GAP

&& writePosition > lastFlushFilePosition) ) {

flush(); // 刷到磁盘

}

return true;

}

public synchronized void flush() {

this.mappedByteBuffer.force();

this.lastFlushTime = System.currentTimeMillis();

this.lastFlushFilePosition = writePosition;

}

public long getLastFlushTime() {

return lastFlushTime;

}

public String getFileName() {

return fileName;

}

public String getFileDirPath() {

return fileDirPath;

}

public boolean isBundSuccess() {

return boundSuccess;

}

public File getFile() {

return file;

}

public static long getMaxFileSize() {

return MAX_FILE_SIZE;

}

public long getWritePosition() {

return writePosition;

}

public long getLastFlushFilePosition() {

return lastFlushFilePosition;

}

public long getMAX_FLUSH_DATA_SIZE() {

return MAX_FLUSH_DATA_SIZE;

}

public long getMAX_FLUSH_TIME_GAP() {

return MAX_FLUSH_TIME_GAP;

}

}

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持脚本之家。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值