HDFS Hedged Read代码分析-CSDN博客

本文链接：https://blog.csdn.net/Gloria_y/article/details/103992548

背景：

Hedged Read参数：

1.dfs.client.hedged.read.threadpool.size 并发Hedged Read的线程池大小

2.dfs.client.hedged.read.threshold.millis 开启一个Hedged Read之前的等待时间（毫秒）

hadoop2.7.2 DFSInputStream类的 hedgedFetchBlockByteRange方法

简要说明：

hedgedFetchBlockByteRange方法通过一个while循环来启动多次读取；

第一次会尝试发起之后，在超时时间内，如果获取数据就直接正常返回；

如果第一次在耗时时间内没获取到，就继续第二次循环，开始真正的Hedged Read。

这一次submit getFromDataNodeCallable之后，会执行getFirstToComplete。看这两次哪个先返回结果，然后cancelAll掉其他的任务。

getFirstToComplete 里面执行的是hedgedService.take();

  /**
   * Like {@link #fetchBlockByteRange(LocatedBlock, long, long, byte[],
   * int, Map)} except we start up a second, parallel, 'hedged' read
   * if the first read is taking longer than configured amount of
   * time.  We then wait on which ever read returns first.
   */
  private void hedgedFetchBlockByteRange(LocatedBlock block, long start,
      long end, byte[] buf, int offset,
      Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap)
      throws IOException {
    ArrayList<Future<ByteBuffer>> futures = new ArrayList<Future<ByteBuffer>>();
    CompletionService<ByteBuffer> hedgedService =
        new ExecutorCompletionService<ByteBuffer>(
        dfsClient.getHedgedReadsThreadPool());
    ArrayList<DatanodeInfo> ignored = new ArrayList<DatanodeInfo>();
    ByteBuffer bb = null;
    int len = (int) (end - start + 1);
    int hedgedReadId = 0;
    block = getBlockAt(block.getStartOffset());

    //开始核心逻辑----------------------------------------
    while (true) {
      // see HDFS-6591, this metric is used to verify/catch unnecessary loops
      hedgedReadOpsLoopNumForTesting++;
      DNAddrPair chosenNode = null;

      // there is no request already executing.
      //如果future列表为空，则说明还没有请求，开始执行第一次请求-----------------
      if (futures.isEmpty()) {
        // chooseDataNode is a commitment. If no node, we go to
        // the NN to reget block locations. Only go here on first read.
        chosenNode = chooseDataNode(block, ignored);
        bb = ByteBuffer.wrap(buf, offset, len);
        Callable<ByteBuffer> getFromDataNodeCallable = getFromOneDataNode(
            chosenNode, block, start, end, bb, corruptedBlockMap,
            hedgedReadId++);
        Future<ByteBuffer> firstRequest = hedgedService
            .submit(getFromDataNodeCallable);
        futures.add(firstRequest);
        try {

          //----------尝试从hedgedService获取结果，超时时间：getHedgedReadTimeout---
          Future<ByteBuffer> future = hedgedService.poll(
              dfsClient.getHedgedReadTimeout(), TimeUnit.MILLISECONDS);
          //----------成功获取数据---------
          if (future != null) {
            future.get();
            return;
          }

          //未成功获取数据
          if (DFSClient.LOG.isDebugEnabled()) {
            DFSClient.LOG.debug("Waited " + dfsClient.getHedgedReadTimeout()
                + "ms to read from " + chosenNode.info
                + "; spawning hedged read");
          }
          // Ignore this node on next go around.

          //下次不选这个节点
          ignored.add(chosenNode.info);
          dfsClient.getHedgedReadMetrics().incHedgedReadOps();
          //继续尝试读取
          continue; // no need to refresh block locations
        } catch (InterruptedException e) {
          // Ignore
        } catch (ExecutionException e) {
          // Ignore already logged in the call.
        }
      } else {//------这里开始新的线程读数据------
        // We are starting up a 'hedged' read. We have a read already
        // ongoing. Call getBestNodeDNAddrPair instead of chooseDataNode.
        // If no nodes to do hedged reads against, pass.
        try {
          try {
            chosenNode = getBestNodeDNAddrPair(block, ignored);
          } catch (IOException ioe) {
            chosenNode = chooseDataNode(block, ignored);
          }
          bb = ByteBuffer.allocate(len);
          Callable<ByteBuffer> getFromDataNodeCallable = getFromOneDataNode(
              chosenNode, block, start, end, bb, corruptedBlockMap,
              hedgedReadId++);
          //提交新的任务
          Future<ByteBuffer> oneMoreRequest = hedgedService
              .submit(getFromDataNodeCallable);
          futures.add(oneMoreRequest);
        } catch (IOException ioe) {
          if (DFSClient.LOG.isDebugEnabled()) {
            DFSClient.LOG.debug("Failed getting node for hedged read: "
                + ioe.getMessage());
          }
        }
        // if not succeeded. Submit callables for each datanode in a loop, wait
        // for a fixed interval and get the result from the fastest one.
        //------这里再尝试获取数据------ 
        try {
          ByteBuffer result = getFirstToComplete(hedgedService, futures);
          // cancel the rest.
          cancelAll(futures);
          if (result.array() != buf) { // compare the array pointers
            dfsClient.getHedgedReadMetrics().incHedgedReadWins();
            System.arraycopy(result.array(), result.position(), buf, offset,
                len);
          } else {
            dfsClient.getHedgedReadMetrics().incHedgedReadOps();
          }
          return;
        } catch (InterruptedException ie) {
          // Ignore and retry
        }
        // We got here if exception. Ignore this node on next go around IFF
        // we found a chosenNode to hedge read against.
        if (chosenNode != null && chosenNode.info != null) {
          ignored.add(chosenNode.info);
        }
      }
    }
  }

  private ByteBuffer getFirstToComplete(
      CompletionService<ByteBuffer> hedgedService,
      ArrayList<Future<ByteBuffer>> futures) throws InterruptedException {
    if (futures.isEmpty()) {
      throw new InterruptedException("let's retry");
    }
    Future<ByteBuffer> future = null;
    try {
      future = hedgedService.take();//一个阻塞队列中获取future
      ByteBuffer bb = future.get();
      futures.remove(future);
      return bb;
    } catch (ExecutionException e) {
      // already logged in the Callable
      futures.remove(future);
    } catch (CancellationException ce) {
      // already logged in the Callable
      futures.remove(future);
    }

    throw new InterruptedException("let's retry");
  }