java nio读取大文本

1. NIOFileReader(处理缓存的逻辑)

public class NIOFileReader {
// 每次读取文件内容缓冲大小,默认为1024个字节
private int bufSize = 1024;
// 换行符
private byte key = "\n".getBytes()[0];
// 当前行数
private long lineNum = 0;
// 文件编码,默认为utf-8
private String encode = "utf-8";
// 具体业务逻辑监听器
private ReaderListen readerListen;

public NIOFileReader(ReaderListen readerListen) {
    this.readerListen = readerListen;
}

public NIOFileReader(ReaderListen readerListen, String encode) {
    this.encode = encode;
    this.readerListen = readerListen;
}

public void readFileByLine(String fullPath) throws Exception {
    File fin = new File(fullPath);
    if (fin.exists()) {
        FileChannel fcin = new RandomAccessFile(fin, "r").getChannel();
        try {
            ByteBuffer rBuffer = ByteBuffer.allocate(bufSize);
            // 每次读取的内容
            byte[] bs = new byte[bufSize];
            // 缓存
            byte[] tempBs = new byte[0];
            String line = "";
            while (fcin.read(rBuffer) != -1) {
                int rSize = rBuffer.position();
                rBuffer.rewind();
                rBuffer.get(bs);
                rBuffer.clear();
                byte[] newStrByte = bs;
                // 如果发现有上次未读完的缓存,则将它加到当前读取的内容前面
                if (null != tempBs) {
                    int tL = tempBs.length;
                    newStrByte = new byte[rSize + tL];
                    System.arraycopy(tempBs, 0, newStrByte, 0, tL);
                    System.arraycopy(bs, 0, newStrByte, tL, rSize);
                }
                int fromIndex = 0;
                int endIndex = 0;
                // 每次读一行内容,以 key(默认为\n) 作为结束符
                while ((endIndex = indexOf(newStrByte, fromIndex)) != -1) {
                    byte[] bLine = substring(newStrByte, fromIndex, endIndex);
                    line = new String(bLine, 0, bLine.length, encode);
                    line = format(line);
                    lineNum++;
                    // 输出一行内容,处理方式由调用方提供
                    readerListen.outLine(line.trim(), lineNum, false);
                    fromIndex = endIndex + 1;
                    // 将未读取完成的内容放到缓存中
                    tempBs = substring(newStrByte, fromIndex, newStrByte.length);
                }
            }
            // 将剩下的最后内容作为一行,输出,并指明这是最后一行
            String lineStr = new String(tempBs, 0, tempBs.length, encode);
            readerListen.outLine(lineStr.trim(), lineNum, true);
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            fcin.close();
        }
    }else {
        throw new FileNotFoundException("没有找到文件:" + fullPath);
    }
}

private byte[] substring(byte[] src, int fromIndex, int endIndex) throws Exception {
    int size = endIndex - fromIndex;
    byte[] ret = new byte[size];
    System.arraycopy(src, fromIndex, ret, 0, size);
    return ret;
}

private int indexOf(byte[] src, int fromIndex) throws Exception {

    for (int i = fromIndex; i < src.length; i++) {
        if (src[i] == key) {
            return i;
        }
    }
    return -1;
}
}

2、ReaderListen(监控缓存的类,从缓存中读取消息)

public abstract class ReaderListen {
// 一次读取行数,默认为500
private int readColNum = 500;
//计数总共有多少条,初始为0
private int count = 0;
private List<String> list = new ArrayList<String>();
protected void setReadColNum(int readColNum) {
    this.readColNum = readColNum;
}
protected int getCount(){
    return count;
}
/**
 * 每读取到一行数据,添加到缓存中
 * @param lineStr 读取到的数据
 * @param lineNum 行号
 * @param over 是否读取完成
 * @throws Exception
 */
public void outLine(String lineStr,long lineNum,boolean over) throws Exception {
    if(lineStr!=null)
        list.add(lineStr);
    if(!over&&(lineNum % readColNum == 0)){
        count += 1;
        output(list);
        list.clear();
    }else if(over){
        count += 1;
        output(list);
        list.clear();
    }
}

/**
 * 批量输出
 *
 * @param stringList
 * @throws Exception
 */
public abstract void output(List<String> stringList) throws Exception;

}

3、ReadText(文件的输入输出设置以及读取参数设置)

public static void main(String[] args) throws Exception {
    String filename = "C:\\Users\\Administrator\\Desktop\\queryResult\\train\\poiNameResult";
    ReaderListen readerListener = new ReaderListen() {
        @Override
        public void output(List<String> stringList) throws Exception {
            int count = getCount();
            File file = new File("result/poiName.text");
            if(!file.exists()){
                file.createNewFile();
            }
            OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream(file,true),"UTF-8");
            BufferedWriter bw = new BufferedWriter(os);
            PrintWriter out = new PrintWriter(bw);
            for (String s : stringList) {
                out.println(s);
            }
            bw.close();
            os.close();
            out.close();
            System.out.println("输出了第"+count+"个50000行");
        }

    };
    readerListener.setReadColNum(50000);
    NIOFileReader nioFileReader = new NIOFileReader(readerListener,"utf-8");
    nioFileReader.readFileByLine(filename);
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值