Java IO类库之BufferedReader

最新推荐文章于 2023-12-29 01:57:08 发布

weixin_34185320

最新推荐文章于 2023-12-29 01:57:08 发布

阅读量485

点赞数

文章标签： java python

原文链接：https://my.oschina.net/zhangyq1991/blog/1930842

版权

2019独角兽企业重金招聘Python工程师标准>>>

一、BufferedReader的介绍

BufferedReader继承自Reader是字符缓冲输入流，它在内部开辟了一个缓冲区为底层字符输入流提供读取缓冲功能，从而可以提供字符、数组和行数据的高效读取。通常字符输入流Reader的每次读取请求都会触发对底层字符或字节输入流的读取，如果没有缓冲每次read和readLine调用都会导致从目标文件中读取字节，并转为字符后返回，这是非常低效的。我们通常建议使用BufferedReader包装read调用开销较大的Reader类，例如：

BufferedReader br = new BufferedReader(new FileReader("input.txt"));

二、BufferedReader类内部成员变量

package java.io;


import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

public class BufferedReader extends Reader {
    //底层绑定的字符输入流
    private Reader in;
    //内部充当缓冲区的字符数组
    private char cb[];
    //缓冲区当前的有效字符数
    private int nChars;
    //缓冲区下一次字符读取的位置
    private int nextChar;
    //标识标记无效。设置了标记，但是被标记位置由于某种原因导致标记无效
    private static final int INVALIDATED = -2;
    //标记位置初始化位置
    private static final int UNMARKED = -1;
    //标记位置，初始化位置-1标识未标记
    private int markedChar = UNMARKED;
    /**
     * 在仍保留该标记的情况下，对可读字符数量的限制。在读取达到或者超过此限制的字符之后尝试重置输入流可能会失败。
     * 限制值大于缓冲区大小时会导致分配一个新的缓冲区，新缓冲区的容量不小于该值，因此应该小心分配该值
     */
    private int readAheadLimit = 0; 
    /** 标识是否跳过换行符 **/
    private boolean skipLF = false;

    /** 表示当做了标记之后，是否忽略换行符 **/
    private boolean markedSkipLF = false;
    //字符缓冲区的默认大小
    private static int defaultCharBufferSize = 8192;
    //每行默认的字符个数
    private static int defaultExpectedLineLength = 80;

}

三、BufferedReader源码分析

1 - 构造函数

    /**
     * 构造函数，指定底层字符输入流，创建方法指定参数大小的字符缓冲区
     */
    public BufferedReader(Reader in, int sz) {
        super(in);
        if (sz <= 0)
            throw new IllegalArgumentException("Buffer size <= 0");
        this.in = in;
        cb = new char[sz];
        nextChar = nChars = 0;
    }

    /**
     * 构造函数，指定底层字符输入流，使用默认大小的字符缓冲区
     */
    public BufferedReader(Reader in) {
        this(in, defaultCharBufferSize);
    }

BufferedReader构造函数简单来讲就做了几件事：1）基于方法入参，指定底层字符输出流；2）创建指定或默认大小的字符缓冲区；3）初始化成员变量nextChar（下一次字符读取位置），nChars（当前缓冲区字符个数）

2 - int read()方法 - 读取单个字符

    /**
     * 读取单个字符，当到达流末尾的时候返回-1
     */
    public int read() throws IOException {
        synchronized (lock) {
            //检测流状态，若底层输入流in为null则说明流已经关闭抛出IO异常
            ensureOpen();
            for (;;) {
                //如果缓冲区数据已被读取完，则从底层字符输入流读入数据填充到缓冲区，如果填充完缓冲区状态仍未被重置则 
                //证明已达到流末尾返回-1
                if (nextChar >= nChars) {
                    fill();
                    if (nextChar >= nChars)
                        return -1;
                }
                //若设置跳过换行符
                if (skipLF) {
                    //先设置skip为false（什么鬼？）
                    skipLF = false;
                    //若当前读取到字符为换行符，跳过继续读取下一个字符
                    if (cb[nextChar] == '\n') {
                        nextChar++;
                        continue;
                    }
                }
                //返回从缓冲区读取到的字符
                return cb[nextChar++];
            }
        }
    }

    private void ensureOpen() throws IOException {
        if (in == null)
            throw new IOException("Stream closed");
    }

read方法首先检测流状态，接下来在for循环内部先基于nextChar>=nChars判断缓冲区是否已经读满，若读满调用fill()方法，我们进入该方法源码看下做了什么：

    private void fill() throws IOException {
        int dst;
        //未标记，则从缓冲区数组索引位置0处开始填充数据
        if (markedChar <= UNMARKED) {
            dst = 0;
        } else {//如果有标记
            /** 计算当前读取位置到标记位置之间的距离 **/
            int delta = nextChar - markedChar;
            //如果距离超出readAheadLimit，标记无效
            if (delta >= readAheadLimit) {
                //设置标记位置设置为无效位-2
                markedChar = INVALIDATED;
                //readAheadLimit重置为0
                readAheadLimit = 0;
                //缓冲区数组起始填充位置为0
                dst = 0;
            } else {//如果上一次标记之后读取的字符个数未超出readLimit限制
                //如果readAheadLimit小于缓冲区数组长度
                if (readAheadLimit <= cb.length) {
                    //将标记位置及之后的数组部分移动到开头
                    System.arraycopy(cb, markedChar, cb, 0, delta);
                    //标记位置设置为0
                    markedChar = 0;
                    //缓冲区填充位置设置为delta
                    dst = delta;
                } else {//若readAheadLimit大于缓冲区数组长度
                    //创建长度为readAheadLimit长度的字符数组
                    char ncb[] = new char[readAheadLimit];
                    //将原缓冲数组标记位置之后的部分复制到新字符数组ncb中
                    System.arraycopy(cb, markedChar, ncb, 0, delta);
                    //内部缓冲区数组引用指向新数组
                    cb = ncb;
                    //标记位置设置为0
                    markedChar = 0;
                    //缓冲区填充位置设置为delta
                    dst = delta;
                }
                //nextChar(写一次字符读取位置)、nChars(缓冲区字符个数)重置为delta
                nextChar = nChars = delta;
            }
        }

        int n;
        do {
            //底层字符输出流尝试读取一段字符数据填满缓冲区数组
            n = in.read(cb, dst, cb.length - dst);
        } while (n == 0);
        //读取字符个数大于0，更新缓冲区字符个数nChars
        if (n > 0) {
            nChars = dst + n;
            nextChar = dst;
        }
    }

fill方法主要作用是从底层字符输入流in读取字符数据填充到缓冲区中，填充过程中还要考虑是否标记以及标记时设置的readAheadLimit（标记之后继续读取多少字符个数可以重新reset到当前位置重新读取的限制）等因素，简单总结下fill方法的基本逻辑如下：

1）首先确定缓冲区数组填充从底层字符输入流读取的字符数据的起始位置dst，这里分以下情况下处理：

若流未被标记则dst=0从缓冲区数组起始位置开始覆盖填充；

若上次标记之后继续读取的字符个数超过readAheadLimit限制，则标记失效，dst=0也从缓冲区数组起始位置开始覆盖填充；

若上次标记之后继续读取的字符个数小于readAheadLimit限制，且缓冲区数组长度小于readAheadLimit，则将缓冲区扩容到readAheadLimit大小，将原缓冲区标记markChar位置及之后的数组部分字符数据复制到新缓冲区，dst=delta(nextChar-markChar)，重置缓冲区标记位置markChar和下一次字符读取位置nextChar；

2）底层字符输入流in尝试读取一段字符数据填满缓冲区剩余部分（即dst开始到缓冲区数组结束部分）；

3）更新缓冲区字符个数nChars和下一个字符读取位置nextChar。

BufferedReader类的read()方法逻辑较为简单，总结基本过程：

1）检测流状态，若流已关闭（底层输入流in为null）则抛出IO异常；

2）循环读取缓冲区数组，若当前缓冲区字符数据已读完则从底层字符输入流读取一段字符数据填充到缓冲区（尝试读取缓冲区大小个数的字符填满缓冲区，但不保证底层字符输入流读取的数据能填满缓冲区可能提前到达流末尾），若到达流末尾则就是返回-1标识已到达流末尾；

3）判断是否设置跳过换行符，若设置则首次读取到换行符直接跳过继续往下读取；

4）返回从字符缓冲区中读取到的字符；

3 - String readLine()方法 - 读取一行字符数据

    public String readLine() throws IOException {
        return readLine(false);
    }

方法实现在readLine(boolean)我们进入该方法源码：

    String readLine(boolean ignoreLF) throws IOException {
        StringBuffer s = null;
        int startChar;
         
        synchronized (lock) {
            //检测流是否关闭
            ensureOpen();
            //是否跳过换行符
            boolean omitLF = ignoreLF || skipLF;

        bufferLoop:
            for (;;) {
                //缓冲区字符数据已经读完则调用fill方法填充字符数据
                if (nextChar >= nChars)
                    fill();
                //已到达流末尾，直接返回已读取字符串
                if (nextChar >= nChars) { 
                    if (s != null && s.length() > 0)
                        return s.toString();
                    else
                        return null;
                }
                boolean eol = false;
                char c = 0;
                int i;

                //若设置跳过换行符omitLF，则跳过
                if (omitLF && (cb[nextChar] == '\n'))
                    nextChar++;
                skipLF = false;
                omitLF = false;

            charLoop:
                for (i = nextChar; i < nChars; i++) {
                    c = cb[i];
                    //碰到行终止符或者回车符\r则跳出循环
                    if ((c == '\n') || (c == '\r')) {
                        eol = true;
                        break charLoop;
                    }
                }

                startChar = nextChar;
                nextChar = i;

                if (eol) {
                    String str;
                    if (s == null) {
                        //s
                        str = new String(cb, startChar, i - startChar);
                    } else {
                        s.append(cb, startChar, i - startChar);
                        str = s.toString();
                    }
                    nextChar++;
                    if (c == '\r') {
                        skipLF = true;
                    }
                    return str;
                }

                if (s == null)
                    s = new StringBuffer(defaultExpectedLineLength);
                //创建StringBuffer缓冲区读取字符数据
                s.append(cb, startChar, i - startChar);
            }
        }
    }

基于readLine(boolean)方法源码我们可以总结readLine()方法的基本逻辑归纳如下：

1）检测当前流状态若已经关闭则抛出IO异常；

2）读取缓冲区中的字符数据若缓冲区已经读满则调用fill方法填充缓冲区继续读取，读取结束的条件是到达流末尾或者碰到回车符“\r“。

4 - 其他成员方法

    /**
     * 读取底层字符输入流的字符数据到指定字符数组cbuf，从数组起始位置off开始填充字符数据
     */
    public int read(char cbuf[], int off, int len) throws IOException {
        synchronized (lock) {
            //检测流状态
            ensureOpen();
            //方法参数off、len范围合法性校验
            if ((off < 0) || (off > cbuf.length) || (len < 0) ||
                ((off + len) > cbuf.length) || ((off + len) < 0)) {
                throw new IndexOutOfBoundsException();
            } else if (len == 0) {
                return 0;
            }
            //读取字符数据到字符数组cbuf
            int n = read1(cbuf, off, len);
            //已到达流末尾
            if (n <= 0) return n;
            //缓冲区读取字符个数小于len，且当前底层字符输出流读取准备就绪则循环读取直到到达流末尾或者读满len个字符
            while ((n < len) && in.ready()) {
                int n1 = read1(cbuf, off + n, len - n);
                if (n1 <= 0) break;
                n += n1;
            }
            return n;
        }
    }

    //跳过n个字符
    public long skip(long n) throws IOException {
        if (n < 0L) {
            throw new IllegalArgumentException("skip value is negative");
        }
        synchronized (lock) {
            ensureOpen();
            long r = n;
            while (r > 0) {
                if (nextChar >= nChars)
                    fill();
                if (nextChar >= nChars) /* EOF */
                    break;
                if (skipLF) {
                    skipLF = false;
                    if (cb[nextChar] == '\n') {
                        nextChar++;
                    }
                }
                long d = nChars - nextChar;
                if (r <= d) {
                    nextChar += r;
                    r = 0;
                    break;
                }
                else {
                    r -= d;
                    nextChar = nChars;
                }
            }
            return n - r;
        }
    }

    /**
     * 判断当前流是否读取准备就绪，如果缓冲区不为空且底层字符输入流读取准备就绪则当前流读取准备就绪
     */
    public boolean ready() throws IOException {
        synchronized (lock) {
            ensureOpen();

            /*
             * If newline needs to be skipped and the next char to be read
             * is a newline character, then just skip it right away.
             */
            if (skipLF) {
                /* Note that in.ready() will return true if and only if the next
                 * read on the stream will not block.
                 */
                if (nextChar >= nChars && in.ready()) {
                    fill();
                }
                if (nextChar < nChars) {
                    if (cb[nextChar] == '\n')
                        nextChar++;
                    skipLF = false;
                }
            }
            return (nextChar < nChars) || in.ready();
        }
    }

    /**
     * 返回是否支持标记
     */
    public boolean markSupported() {
        return true;
    }

    /**
     *  标记此流的当前读取位置，后续对reset的调用将会尝试将流定位到当前点重新读取
     */
    public void mark(int readAheadLimit) throws IOException {
        if (readAheadLimit < 0) {
            throw new IllegalArgumentException("Read-ahead limit < 0");
        }
        synchronized (lock) {
            ensureOpen();
            this.readAheadLimit = readAheadLimit;
            markedChar = nextChar;
            markedSkipLF = skipLF;
        }
    }

    /**
     * 尝试将流重置到最近一次标记点重新读取
     */
    public void reset() throws IOException {
        synchronized (lock) {
            ensureOpen();
            if (markedChar < 0)
                throw new IOException((markedChar == INVALIDATED)
                                      ? "Mark invalid"
                                      : "Stream not marked");
            nextChar = markedChar;
            skipLF = markedSkipLF;
        }
    }

    //关闭流。若底层字符输入流不为空则调用该流的close方法关闭该流，并将底层字符输入流引用和字符缓冲区数组引用设置为 
    //null
    public void close() throws IOException {
        synchronized (lock) {
            if (in == null)
                return;
            try {
                in.close();
            } finally {
                in = null;
                cb = null;
            }
        }
    }

    /**
     * 返回一个Stream，其中的元素是调用BufferedReader的readLine()方法中读出的行。
     */
    public Stream<String> lines() {
        Iterator<String> iter = new Iterator<String>() {
            String nextLine = null;

            @Override
            public boolean hasNext() {
                if (nextLine != null) {
                    return true;
                } else {
                    try {
                        nextLine = readLine();
                        return (nextLine != null);
                    } catch (IOException e) {
                        throw new UncheckedIOException(e);
                    }
                }
            }

            @Override
            public String next() {
                if (nextLine != null || hasNext()) {
                    String line = nextLine;
                    nextLine = null;
                    return line;
                } else {
                    throw new NoSuchElementException();
                }
            }
        };
        return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
                iter, Spliterator.ORDERED | Spliterator.NONNULL), false);
    }

转载于:https://my.oschina.net/zhangyq1991/blog/1930842