java 快速读取大文本文件_Java用7000万行文本读取大文本文件-CSDN博客

本文链接：https://blog.csdn.net/weixin_35870524/article/details/114591929

本文档介绍了一种使用Java高效地从大型文本文件中读取行的方法。通过使用SeekableByteChannel和ByteBuffer，代码逐行扫描文件，查找换行符以确定行结束位置。在找到行结束符后，该方法将读取并返回该行的内容。这种方法适用于处理大文件，且能够处理不同操作系统的换行符差异。

摘要由CSDN通过智能技术生成

我有一个类似的问题，但我只需要从文件中的字节。我阅读了各种答案中提供的链接，并最终尝试在Evgeniy的答案中写下类似于＃5的链接。他们不是在开玩笑，它花了很多代码。

基本的前提是每一行文字的长度都是未知的。我将从一个SeekableByteChannel开始，将数据读入一个ByteBuffer，然后遍历它寻找EOL。当循环之间的东西是“遗留”时，它增加一个计数器，然后最终移动SeekableByteChannel位置并读取整个缓冲区。

这是详细的…但它的作品。这对我所需要的东西来说足够快，但我相信还有更多可以改进的地方。

这个过程的方法被剥离到基本的开始阅读文件。

private long startOffset; private long endOffset; private SeekableByteChannel sbc; private final ByteBuffer buffer = ByteBuffer.allocateDirect(1024); public void process() throws IOException { startOffset = 0; sbc = Files.newByteChannel(FILE, EnumSet.of(READ)); byte[] message = null; while((message = readRecord()) != null) { // do something } } public byte[] readRecord() throws IOException { endOffset = startOffset; boolean eol = false; boolean carryOver = false; byte[] record = null; while(!eol) { byte data; buffer.clear(); final int bytesRead = sbc.read(buffer); if(bytesRead == -1) { return null; } buffer.flip(); for(int i = 0; i < bytesRead && !eol; i++) { data = buffer.get(); if(data == '\r' || data == '\n') { eol = true; endOffset += i; if(carryOver) { final int messageSize = (int)(endOffset - startOffset); sbc.position(startOffset); final ByteBuffer tempBuffer = ByteBuffer.allocateDirect(messageSize); sbc.read(tempBuffer); tempBuffer.flip(); record = new byte[messageSize]; tempBuffer.get(record); } else { record = new byte[i]; // Need to move the buffer position back since the get moved it forward buffer.position(0); buffer.get(record, 0, i); } // Skip past the newline characters if(isWindowsOS()) { startOffset = (endOffset + 2); } else { startOffset = (endOffset + 1); } // Move the file position back sbc.position(startOffset); } } if(!eol && sbc.position() == sbc.size()) { // We have hit the end of the file, just take all the bytes record = new byte[bytesRead]; eol = true; buffer.position(0); buffer.get(record, 0, bytesRead); } else if(!eol) { // The EOL marker wasn't found, continue the loop carryOver = true; endOffset += bytesRead; } } // System.out.println(new String(record)); return record; }