spark源码分析之ReadAheadInputStream

概述

ReadAheadInputStream实现了从当前buffer读取的data耗尽时,切换到另外一个buffer读取数据,并启动任务从底层输入流异步预读data,放入耗尽的buffer中。它通过2个buffer来完成——active buffer和read ahead buffer。在调用read()方法时会返回active buffer中的数据。而read ahead buffer用于从底层输入流异步读取数据。通过切换这2个buffer,我们可以在active buffer耗尽时开始从read ahead buffer读取数据,无需阻塞在磁盘IO上。

从底层输入流异步读取数据到read ahead buffer ,相当于写线程。

当前线程从read ahead buffer读取数据,相当于读线程(reader)。

在swap buffer、触发async reading、获取async state时需要加锁。

async read在填满read ahead buffer才返回可能会增加延迟,所以如果有reader等待数据,还可以添加一个“AtomicBoolean”标志,以便能够更早地返回。

成员变量

从概述可以得出,我们至少需要以下成员变量:activeBuffer、readAheadBuffer、underlyingInputStream等,同时还需要用于并发的相关成员变量。

  private ReentrantLock stateChangeLock = new ReentrantLock();

  @GuardedBy("stateChangeLock")

  private ByteBuffer activeBuffer;


  @GuardedBy("stateChangeLock")

  private ByteBuffer readAheadBuffer;

  private final InputStream underlyingInputStream;

  // whether there is a reader waiting for data.
  private AtomicBoolean isWaiting = new AtomicBoolean(false);

  private final ExecutorService executorService =

      ThreadUtils.newDaemonSingleThreadExecutor("read-ahead");

  @GuardedBy("stateChangeLock")
  // true if async read is in progress
  //判断async read是否完成的条件变量,如果为true,表示仍在进行
  private boolean readInProgress;

  //阻塞等待async read完成的condition实例
  private final Condition asyncReadComplete = stateChangeLock.newCondition();

read方法

@Override

  public int read(byte[] b, int offset, int len) throws IOException {

    if (offset < 0 || len < 0 || len > b.length - offset) {

      throw new IndexOutOfBoundsException();  //前置检查参数是否正确

    }

    if (len == 0) {

      return 0;

    }



    if (!activeBuffer.hasRemaining()) {  
       
      // No remaining in active buffer - lock and switch to write ahead buffer.
      //如果active buffer耗尽,获取锁并转换到ahead buffer

      stateChangeLock.lock(); 

      try {
        //while判断async read是否正在进行,如果是则阻塞等待直到async read完成,否则立即返回。
  //因为在反转read ahead buffer和active buffer后,
 //还会对read ahead buffer执行一次async read操作,所以应该阻塞等待该操作完成。
//如果调用该方法时async read操作已经完成,该方法会立即返回。
//如果是第一次调用read方法,没有触发async read操作,该方法也会立即返回。
        waitForAsyncReadComplete(); 

        if (!readAheadBuffer.hasRemaining()) { 
          // The first read.
          //如果readAheadBuffer中没有数据,说明是第一次调用read方法
          //只有第一次调用read方法,才会2个buffer都同时没数据,才会走这个if方法体的逻辑。
          //第二次及以后,因为async read,所以readAheadBuffer中总是有数据以备用
          readAsync();  //在线程池中启动新线程执行async read 

          waitForAsyncReadComplete(); //阻塞等待直到async read完成

          if (isEndOfStream()) {

            return -1;

          }

        }

        // Swap the newly read read ahead buffer in place of empty active buffer.

        swapBuffers();  //反转read ahead buffer和active buffer

        // After swapping buffers, trigger another async read for read ahead buffer.

        readAsync();  //反转2个buffer后,为read ahead buffer执行另一次async read操作

      } finally {

        stateChangeLock.unlock();

      }

    }

    len = Math.min(len, activeBuffer.remaining()); //实际可读取的字节数
    
    //从activeBuffer读取len个字节到目标字节数组b的offset开始处
    activeBuffer.get(b, offset, len);   


    return len;

  }

readAsync方法

/** Read data from underlyingInputStream to readAheadBuffer asynchronously. */

  private void readAsync() throws IOException {

    stateChangeLock.lock();

    final byte[] arr = readAheadBuffer.array(); //获取readAheadBuffer的backing array

    try {

      if (endOfStream || readInProgress) {
      //如果readInProgress为true,说明线程池中已经有线程正在执行async read,return结束返回

        return;

      }

      checkReadException();

      readAheadBuffer.position(0); //重置readAheadBuffer

      readAheadBuffer.flip();

      //将readInProgress置为true,表明将在线程池开启新线程执行async read
      readInProgress = true;

    } finally {

      stateChangeLock.unlock();

    }

    executorService.execute(new Runnable() {



      @Override

      public void run() {

        stateChangeLock.lock();

        try {

          if (isClosed) {

            readInProgress = false;

            return;

          }

          // Flip this so that the close method will not close the underlying input stream when we

          // are reading.
          //切换isReading为true,表明一个read ahead task正在执行,所以close方法将不会关闭底层输入流

          isReading = true;

        } finally {

          stateChangeLock.unlock();

        }



        // Please note that it is safe to release the lock and read into the read ahead buffer

        // because either of following two conditions will hold - 1. The active buffer has

        // data available to read so the reader will not read from the read ahead buffer.

        // 2. This is the first time read is called or the active buffer is exhausted,

        // in that case the reader waits for this async read to complete.

        // So there is no race condition in both the situations.
        //释放锁去读取数据到readAheadBuffer是安全的,因为从readAheadBuffer读取数据满足以下条件之一:
        //1、一般情况下active buffer是可用的,所以reader不会从readAheadBuffer读取数据;
        //2、第一次调用read方法,或者active buffer耗尽后,会阻塞等待async read完成;

        int read = 0;

        int off = 0, len = arr.length;

        Throwable exception = null;

        try {

          // try to fill the read ahead buffer.

          // if a reader is waiting, possibly return early.

          do {
            //读取数据到readAheadBuffer的backing array,只有在填满backing array或者到达文件末尾,
//或者当前有线程等待async read完成才退出while循环
            read = underlyingInputStream.read(arr, off, len);

            if (read <= 0) break; //到达文件末尾,break退出循环

            off += read;

            len -= read;  //backing array的剩余空间长度

          } while (len > 0 && !isWaiting.get()); //如果backing array未填满且当前没有线程等待async read完成

        } catch (Throwable ex) {

          exception = ex;

          if (ex instanceof Error) {

            // `readException` may not be reported to the user. Rethrow Error to make sure at least

            // The user can see Error in UncaughtExceptionHandler.

            throw (Error) ex;

          }

        } finally {

          stateChangeLock.lock();

          readAheadBuffer.limit(off);

          if (read < 0 || (exception instanceof EOFException)) {

            endOfStream = true;

          } else if (exception != null) {

            readAborted = true;

            readException = exception;

          }
    
          readInProgress = false; //重置readInProgress,表示执行async read完成
          signalAsyncReadComplete(); //通知正在阻塞等待async read完成的线程

          stateChangeLock.unlock();

          closeUnderlyingInputStreamIfNecessary();

        }

      }

    });

  }

swapBuffers方法

/**

   * flip the active and read ahead buffer

   */

  private void swapBuffers() {

    ByteBuffer temp = activeBuffer;

    activeBuffer = readAheadBuffer;

    readAheadBuffer = temp;

  }

waitForAsyncReadComplete方法

while判断async read是否正在进行,如果是则阻塞等待直到async read完成,否则立即返回。

private void waitForAsyncReadComplete() throws IOException {

    stateChangeLock.lock();

    isWaiting.set(true); //表明当前有reader正在等待数据 

    try {

      // There is only one reader, and one writer, so the writer should signal only once,

      // but a while loop checking the wake up condition is still needed to avoid spurious wakeups.  应该使用while循环校验唤醒条件,从而避免虚假唤醒

      while (readInProgress) {  //readInProgress字段作为条件变量,如果为true,表示async read仍在进行

        asyncReadComplete.await(); //阻塞等待async read完成

      }

    } catch (InterruptedException e) {

      InterruptedIOException iio = new InterruptedIOException(e.getMessage());

      iio.initCause(e);

      throw iio;

    } finally {

      isWaiting.set(false);

      stateChangeLock.unlock();

    }

    checkReadException();

  }

 signalAsyncReadComplete方法

通知async read执行完成。

 private void signalAsyncReadComplete() {

    stateChangeLock.lock();

    try {

      asyncReadComplete.signalAll();

    } finally {

      stateChangeLock.unlock();

    }

  }

源码来自:ReadAheadInputStream

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值