HDFS replaceBlock方法

 这个方法属于流式接口DataTransferProtocol中的方法,用于Blancing和Storage Policy Satisfier (SPS)过程中取代原有数据块。

整个调用过程如下:

 

 其中,Sender的调用用于发送请求但不要求返回值(方法返回void),DataXceiver的调用响应了Sender的请求。Sender的调用大同小异,这里主要分析DataXceiver的方法。

1.replaceBlock方法

下列描述中的源DN即为上图DatanodeB。

// DataXceiver.replaceBlock方法实现了DataTransferProtocol接口的方法
// 主要功能为从以delHint为UUID的源DN上复制一块数据块至当前DN,然后通知NN在下次心跳时删除源DN
@Override
public void replaceBlock(final ExtendedBlock block,
    final StorageType storageType, 
    final Token<BlockTokenIdentifier> blockToken,
    final String delHint,
    final DatanodeInfo proxySource,
    final String storageId) throws IOException {
  updateCurrentThreadName("Replacing block " + block + " from " + delHint);
  DataOutputStream replyOut = new DataOutputStream(getOutputStream());  // 创建输出流,回应源DN
  checkAccess(replyOut, true, block, blockToken,
      Op.REPLACE_BLOCK, BlockTokenIdentifier.AccessMode.REPLACE,
      new StorageType[]{storageType},
      new String[]{storageId});  // 鉴权

  if (!dataXceiverServer.balanceThrottler.acquire()) { // not able to start
    String msg = "Not able to receive block " + block.getBlockId() +
        " from " + peer.getRemoteAddressString() + " because threads " +
        "quota is exceeded.";
    LOG.warn(msg);
    sendResponse(ERROR, msg);
    return;
  }

  Socket proxySock = null;
  DataOutputStream proxyOut = null;
  Status opStatus = SUCCESS;
  String errMsg = null;
  DataInputStream proxyReply = null;
  boolean IoeDuringCopyBlockOperation = false;
  try {
    // Move the block to different storage in the same datanode
    if (proxySource.equals(datanode.getDatanodeId())) {
      ReplicaInfo oldReplica = datanode.data.moveBlockAcrossStorage(block,
          storageType, storageId);
      if (oldReplica != null) {
        LOG.info("Moved {} from StorageType {} to {}",
            block, oldReplica.getVolume().getStorageType(), storageType);
      }
    } else {
      block.setNumBytes(dataXceiverServer.estimateBlockSize);
      // get the output stream to the proxy
      final String dnAddr = proxySource.getXferAddr(connectToDnViaHostname);
      LOG.debug("Connecting to datanode {}", dnAddr);
      InetSocketAddress proxyAddr = NetUtils.createSocketAddr(dnAddr);
      proxySock = datanode.newSocket();
      NetUtils.connect(proxySock, proxyAddr, dnConf.socketTimeout);
      proxySock.setTcpNoDelay(dnConf.getDataTransferServerTcpNoDelay());
      proxySock.setSoTimeout(dnConf.socketTimeout);
      proxySock.setKeepAlive(true);

      OutputStream unbufProxyOut = NetUtils.getOutputStream(proxySock,
          dnConf.socketWriteTimeout);
      InputStream unbufProxyIn = NetUtils.getInputStream(proxySock);
      DataEncryptionKeyFactory keyFactory =
          datanode.getDataEncryptionKeyFactoryForBlock(block);
      IOStreamPair saslStreams = datanode.saslClient.socketSend(proxySock,
          unbufProxyOut, unbufProxyIn, keyFactory, blockToken, proxySource);
      unbufProxyOut = saslStreams.out;
      unbufProxyIn = saslStreams.in;
      
      proxyOut = new DataOutputStream(new BufferedOutputStream(unbufProxyOut,
          smallBufferSize));  // 到源DN的输出流
      proxyReply = new DataInputStream(new BufferedInputStream(unbufProxyIn,
          ioFileBufferSize)); // 接收源DN数据的输入流
      
      /* send request to the proxy */
      IoeDuringCopyBlockOperation = true;
      new Sender(proxyOut).copyBlock(block, blockToken);
      IoeDuringCopyBlockOperation = false;
      
      // receive the response from the proxy
      
      BlockOpResponseProto copyResponse = BlockOpResponseProto.parseFrom(
          PBHelperClient.vintPrefixed(proxyReply));

      String logInfo = "copy block " + block + " from "
          + proxySock.getRemoteSocketAddress();
      DataTransferProtoUtil.checkBlockOpStatus(copyResponse, logInfo, true);

      // get checksum info about the block we're copying
      ReadOpChecksumInfoProto checksumInfo = copyResponse.getReadOpChecksumInfo();
      DataChecksum remoteChecksum = DataTransferProtoUtil.fromProto(
          checksumInfo.getChecksum());
      // open a block receiver and check if the block does not exist
      setCurrentBlockReceiver(getBlockReceiver(block, storageType,
          proxyReply, proxySock.getRemoteSocketAddress().toString(),
          proxySock.getLocalSocketAddress().toString(),
          null, 0, 0, 0, "", null, datanode, remoteChecksum,
          CachingStrategy.newDropBehind(), false, false, storageId));
      
      // receive a block  这里复用了写数据时,DN之间传输数据的方法
      blockReceiver.receiveBlock(null, null, replyOut, null, 
          dataXceiverServer.balanceThrottler, null, true);
      
      // notify name node
      final Replica r = blockReceiver.getReplica();
      datanode.notifyNamenodeReceivedBlock(
          block, delHint, r.getStorageUuid(), r.isOnTransientStorage());
      
      LOG.info("Moved {} from {}, delHint={}",
          block, peer.getRemoteAddressString(), delHint);
    }
  } catch (IOException ioe) {
    opStatus = ERROR;
    if (ioe instanceof BlockPinningException) {
      opStatus = Status.ERROR_BLOCK_PINNED;
    }
    errMsg = "opReplaceBlock " + block + " received exception " + ioe; 
    LOG.info(errMsg);
    if (!IoeDuringCopyBlockOperation) {
      // Don't double count IO errors
      incrDatanodeNetworkErrors();
    }
    throw ioe;
  } finally {
    // receive the last byte that indicates the proxy released its thread resource
    if (opStatus == SUCCESS && proxyReply != null) {
      try {
        proxyReply.readChar();
      } catch (IOException ignored) {
      }
    }
    
    // now release the thread resource
    dataXceiverServer.balanceThrottler.release();
    
    // send response back
    try {
      sendResponse(opStatus, errMsg);
    } catch (IOException ioe) {
      LOG.warn("Error writing reply back to {}",
          peer.getRemoteAddressString());
      incrDatanodeNetworkErrors();
    }
    IOUtils.closeStream(proxyOut);
    IOUtils.closeStream(blockReceiver);
    IOUtils.closeStream(proxyReply);
    IOUtils.closeStream(replyOut);
  }

  //update metrics
  datanode.metrics.addReplaceBlockOp(elapsed());
}

2.copyBlock方法

copyBlock方法仅在replaceBlock方法中使用,可以说和replaceBlock方法绑定了。

注意,下列代码是在源DN上调用的,是要写回数据块给调用replaceBlock方法的机器的。

  @Override
  public void copyBlock(final ExtendedBlock block,
      final Token<BlockTokenIdentifier> blockToken) throws IOException {
    updateCurrentThreadName("Copying block " + block);
    DataOutputStream reply = getBufferedOutputStream();  // 传输chunk的输出流
    checkAccess(reply, true, block, blockToken, Op.COPY_BLOCK,
        BlockTokenIdentifier.AccessMode.COPY);  // 鉴权

    if (datanode.data.getPinning(block)) {
      String msg = "Not able to copy block " + block.getBlockId() + " " +
          "to " + peer.getRemoteAddressString() + " because it's pinned ";
      LOG.info(msg);
      sendResponse(Status.ERROR_BLOCK_PINNED, msg);
      return;
    }
    
    if (!dataXceiverServer.balanceThrottler.acquire()) { // not able to start
      String msg = "Not able to copy block " + block.getBlockId() + " " +
          "to " + peer.getRemoteAddressString() + " because threads " +
          "quota is exceeded.";
      LOG.info(msg);
      sendResponse(ERROR, msg);
      return;
    }

    BlockSender blockSender = null;
    boolean isOpSuccess = true;

    try {
      // check if the block exists or not
      blockSender = new BlockSender(block, 0, -1, false, false, true, datanode, 
          null, CachingStrategy.newDropBehind());

      OutputStream baseStream = getOutputStream();

      // send status first
      writeSuccessWithChecksumInfo(blockSender, reply);

      long beginRead = Time.monotonicNow();
      // send block content to the target  发送数据块的代码就在这里
      long read = blockSender.sendBlock(reply, baseStream,
                                        dataXceiverServer.balanceThrottler);
      long duration = Time.monotonicNow() - beginRead;
      datanode.metrics.incrBytesRead((int) read);
      datanode.metrics.incrBlocksRead();
      datanode.metrics.incrTotalReadTime(duration);
      
      LOG.info("Copied {} to {}", block, peer.getRemoteAddressString());
    } catch (IOException ioe) {
      isOpSuccess = false;
      LOG.info("opCopyBlock {} received exception {}", block, ioe.toString());
      incrDatanodeNetworkErrors();
      // Normally the client reports a bad block to the NN. However if the
      // meta file is corrupt or an disk error occurs (EIO), then the client
      // never gets a chance to do validation, and hence will never report
      // the block as bad. For some classes of IO exception, the DN should
      // report the block as bad, via the handleBadBlock() method
      datanode.handleBadBlock(block, ioe, false);
      throw ioe;
    } finally {
      dataXceiverServer.balanceThrottler.release();
      if (isOpSuccess) {
        try {
          // send one last byte to indicate that the resource is cleaned.
          reply.writeChar('d');
        } catch (IOException ignored) {
        }
      }
      IOUtils.closeStream(reply);
      IOUtils.closeStream(blockSender);
    }

    //update metrics    
    datanode.metrics.addCopyBlockOp(elapsed());
  }

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值