java操作大文件3种方法对比

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
 
public class CountWordsOfArticle {
	public void countWordsOfArticle(String fileName, int arraySize) throws IOException {
		File file = new File(fileName);
		if (!file.exists()) {
			System.out.println("该文件不存在");
			return;
		}
		MappedBiggerFileReader reader = new MappedBiggerFileReader(fileName, arraySize);
		while (reader.read() != -1) {
			wordCount(reader);
		}
	}
 
	private static void wordCount(MappedBiggerFileReader reader) throws IOException {
		Map<String, Integer> map = new ConcurrentHashMap<>();
		BufferedReader in = new BufferedReader(new InputStreamReader(reader));
		StringBuffer buffer = new StringBuffer();
		String line = " ";
		while ((line = in.readLine()) != null) {
			buffer.append(line);
		}
		String request = buffer.toString();
 
		Pattern p = Pattern.compile("[, . ; ! ? ]");
		Matcher m = p.matcher(request);
		String[] strs = p.split(request);
		for (int i = 0; i < strs.length; i++) {
			if (map.containsKey(strs[i].toLowerCase())) {
				map.put(strs[i].toLowerCase(), map.get(strs[i].toLowerCase()) + 1);
			} else {
				map.put(strs[i].toLowerCase(), 1);
			}
		}
		List<Map.Entry<String, Integer>> result = map.entrySet().stream()
				.sorted(new Comparator<Map.Entry<String, Integer>>() {
					@Override
					public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
						return o2.getValue().compareTo(o1.getValue());
					}
				}).collect(Collectors.toList());
		result.forEach(item -> {
			System.out.println(item.getKey() + " " + item.getValue());
		});
	}
 
	public class MappedBiggerFileReader extends InputStream{
		private MappedByteBuffer[] mappedBufArray;
		private int count = 0;
		private int number;
		private FileInputStream fileIn;
		private long fileLength;
		private int arraySize;
		private byte[] array;
 
		public MappedBiggerFileReader(String fileName, int arraySize) throws IOException {
			this.fileIn = new FileInputStream(fileName);
			FileChannel fileChannel = fileIn.getChannel();
			this.fileLength = fileChannel.size();
			this.number = (int) Math.ceil((double) fileLength / (double) Integer.MAX_VALUE);
			this.mappedBufArray = new MappedByteBuffer[number];// 内存文件映射数组
			long preLength = 0;
			long regionSize = (long) Integer.MAX_VALUE;// 映射区域的大小
			for (int i = 0; i < number; i++) {// 将文件的连续区域映射到内存文件映射数组中
				if (fileLength - preLength < (long) Integer.MAX_VALUE) {
					regionSize = fileLength - preLength;// 最后一片区域的大小
				}
				mappedBufArray[i] = fileChannel.map(FileChannel.MapMode.READ_ONLY, preLength, regionSize);
				preLength += regionSize;// 下一片区域的开始
			}
			this.arraySize = arraySize;
		}
 
		public int read() throws IOException {
			if (count >= number) {
				return -1;
			}
			int limit = mappedBufArray[count].limit();
			int position = mappedBufArray[count].position();
			if (limit - position > arraySize) {
				array = new byte[arraySize];
				mappedBufArray[count].get(array);
				return arraySize;
			} else {// 本内存文件映射最后一次读取数据
				array = new byte[limit - position];
				mappedBufArray[count].get(array);
				if (count < number) {
					count++;// 转换到下一个内存文件映射
				}
				return limit - position;
			}
		}
 
		public void close() throws IOException {
			fileIn.close();
			array = null;
		}
 
		public byte[] getArray() {
			return array;
		}
 
		public long getFileLength() {
			return fileLength;
		}
	}
}




1.文件字节流----测试代码如下:


import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
 
public class StreamFileReader {
	private BufferedInputStream fileIn;
    private long fileLength;
    private int arraySize;
    private byte[] array;
 
	public StreamFileReader(String fileName, int arraySize) throws IOException {
        this.fileIn = new BufferedInputStream(new FileInputStream(fileName), arraySize);
        this.fileLength = fileIn.available();
        this.arraySize = arraySize;
    }
 
    public int read() throws IOException {
        byte[] tmpArray = new byte[arraySize];
        int bytes = fileIn.read(tmpArray);// 暂存到字节数组中
        if (bytes != -1) {
            array = new byte[bytes];// 字节数组长度为已读取长度
            System.arraycopy(tmpArray, 0, array, 0, bytes);// 复制已读取数据
            return bytes;
        }
        return -1;
    }
 
    public void close() throws IOException {
        fileIn.close();
        array = null;
    }
 
    public byte[] getArray() {
        return array;
    }
 
    public long getFileLength() {
        return fileLength;
    }
 
    public static void main(String[] args) throws IOException {
        StreamFileReader reader = new StreamFileReader("/home/zfh/movie.mkv", 65536);
        long start = System.nanoTime();
        while (reader.read() != -1) ;
        long end = System.nanoTime();
        reader.close();
        System.out.println("StreamFileReader: " + (end - start));
    }
}

2.文件通道----测试代码如下:
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
 
public class ChannelFileReader {
	private FileInputStream fileIn;
    private ByteBuffer byteBuf;
    private long fileLength;
    private int arraySize;
    private byte[] array;
 
    public ChannelFileReader(String fileName, int arraySize) throws IOException {
        this.fileIn = new FileInputStream(fileName);
        this.fileLength = fileIn.getChannel().size();
        this.arraySize = arraySize;
        this.byteBuf = ByteBuffer.allocate(arraySize);
    }
 
    public int read() throws IOException {
        FileChannel fileChannel = fileIn.getChannel();
        int bytes = fileChannel.read(byteBuf);// 读取到ByteBuffer中
        if (bytes != -1) {
            array = new byte[bytes];// 字节数组长度为已读取长度
            byteBuf.flip();
            byteBuf.get(array);// 从ByteBuffer中得到字节数组
            byteBuf.clear();
            return bytes;
        }
        return -1;
    }
 
    public void close() throws IOException {
        fileIn.close();
        array = null;
    }
 
    public byte[] getArray() {
        return array;
    }
 
    public long getFileLength() {
        return fileLength;
    }
 
    public static void main(String[] args) throws IOException {
        ChannelFileReader reader = new ChannelFileReader("/home/zfh/movie.mkv", 65536);
        long start = System.nanoTime();
        while (reader.read() != -1) ;
        long end = System.nanoTime();
        reader.close();
        System.out.println("ChannelFileReader: " + (end - start));
    }
}

3.内存文件映射----测试代码如下:

import java.io.FileInputStream;
import java.io.IOException;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
 
public class MappedBiggerFileReader {
    private MappedByteBuffer[] mappedBufArray;
    private int count = 0;
    private int number;
    private FileInputStream fileIn;
    private long fileLength;
    private int arraySize;
    private byte[] array;
 
    public MappedBiggerFileReader(String fileName, int arraySize) throws IOException {
        this.fileIn = new FileInputStream(fileName);
        FileChannel fileChannel = fileIn.getChannel();
        this.fileLength = fileChannel.size();
        this.number = (int) Math.ceil((double) fileLength / (double) Integer.MAX_VALUE);
        this.mappedBufArray = new MappedByteBuffer[number];// 内存文件映射数组
        long preLength = 0;
        long regionSize = (long) Integer.MAX_VALUE;// 映射区域的大小
        for (int i = 0; i < number; i++) {// 将文件的连续区域映射到内存文件映射数组中
            if (fileLength - preLength < (long) Integer.MAX_VALUE) {
                regionSize = fileLength - preLength;// 最后一片区域的大小
            }
            mappedBufArray[i] = fileChannel.map(FileChannel.MapMode.READ_ONLY, preLength, regionSize);
            preLength += regionSize;// 下一片区域的开始
        }
        this.arraySize = arraySize;
    }
 
    public int read() throws IOException {
        if (count >= number) {
            return -1;
        }
        int limit = mappedBufArray[count].limit();
        int position = mappedBufArray[count].position();
        if (limit - position > arraySize) {
            array = new byte[arraySize];
            mappedBufArray[count].get(array);
            return arraySize;
        } else {// 本内存文件映射最后一次读取数据
            array = new byte[limit - position];
            mappedBufArray[count].get(array);
            if (count < number) {
                count++;// 转换到下一个内存文件映射
            }
            return limit - position;
        }
    }
 
    public void close() throws IOException {
        fileIn.close();
        array = null;
    }
 
    public byte[] getArray() {
        return array;
    }
 
    public long getFileLength() {
        return fileLength;
    }
 
    public static void main(String[] args) throws IOException {
        MappedBiggerFileReader reader = new MappedBiggerFileReader("/home/zfh/movie.mkv", 65536);
        long start = System.nanoTime();
        while (reader.read() != -1) ;
        long end = System.nanoTime();
        reader.close();
        System.out.println("MappedBiggerFileReader: " + (end - start));
    }
}

运行结果比较 

用上面三种方法读取1GB文件,运行结果如下

StreamFileReader:  11494900386
ChannelFileReader: 11329346316
MappedFileReader:  11169097480


读取10GB文件,运行结果如下

StreamFileReader:       194579779394
ChannelFileReader:      190430242497
MappedBiggerFileReader: 186923035795

原文链接:https://blog.csdn.net/xiaofeng10330111/article/details/87958174

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值