这个方法属于流式接口DataTransferProtocol中的方法,用于Blancing和Storage Policy Satisfier (SPS)过程中取代原有数据块。
整个调用过程如下:
其中,Sender的调用用于发送请求但不要求返回值(方法返回void),DataXceiver的调用响应了Sender的请求。Sender的调用大同小异,这里主要分析DataXceiver的方法。
1.replaceBlock方法
下列描述中的源DN即为上图DatanodeB。
// DataXceiver.replaceBlock方法实现了DataTransferProtocol接口的方法
// 主要功能为从以delHint为UUID的源DN上复制一块数据块至当前DN,然后通知NN在下次心跳时删除源DN
@Override
public void replaceBlock(final ExtendedBlock block,
final StorageType storageType,
final Token<BlockTokenIdentifier> blockToken,
final String delHint,
final DatanodeInfo proxySource,
final String storageId) throws IOException {
updateCurrentThreadName("Replacing block " + block + " from " + delHint);
DataOutputStream replyOut = new DataOutputStream(getOutputStream()); // 创建输出流,回应源DN
checkAccess(replyOut, true, block, blockToken,
Op.REPLACE_BLOCK, BlockTokenIdentifier.AccessMode.REPLACE,
new StorageType[]{storageType},
new String[]{storageId}); // 鉴权
if (!dataXceiverServer.balanceThrottler.acquire()) { // not able to start
String msg = "Not able to receive block " + block.getBlockId() +
" from " + peer.getRemoteAddressString() + " because threads " +
"quota is exceeded.";
LOG.warn(msg);
sendResponse(ERROR, msg);
return;
}
Socket proxySock = null;
DataOutputStream proxyOut = null;
Status opStatus = SUCCESS;
String errMsg = null;
DataInputStream proxyReply = null;
boolean IoeDuringCopyBlockOperation = false;
try {
// Move the block to different storage in the same datanode
if (proxySource.equals(datanode.getDatanodeId())) {
ReplicaInfo oldReplica = datanode.data.moveBlockAcrossStorage(block,
storageType, storageId);
if (oldReplica != null) {
LOG.info("Moved {} from StorageType {} to {}",
block, oldReplica.getVolume().getStorageType(), storageType);
}
} else {
block.setNumBytes(dataXceiverServer.estimateBlockSize);
// get the output stream to the proxy
final String dnAddr = proxySource.getXferAddr(connectToDnViaHostname);
LOG.debug("Connecting to datanode {}", dnAddr);
InetSocketAddress proxyAddr = NetUtils.createSocketAddr(dnAddr);
proxySock = datanode.newSocket();
NetUtils.connect(proxySock, proxyAddr, dnConf.socketTimeout);
proxySock.setTcpNoDelay(dnConf.getDataTransferServerTcpNoDelay());
proxySock.setSoTimeout(dnConf.socketTimeout);
proxySock.setKeepAlive(true);
OutputStream unbufProxyOut = NetUtils.getOutputStream(proxySock,
dnConf.socketWriteTimeout);
InputStream unbufProxyIn = NetUtils.getInputStream(proxySock);
DataEncryptionKeyFactory keyFactory =
datanode.getDataEncryptionKeyFactoryForBlock(block);
IOStreamPair saslStreams = datanode.saslClient.socketSend(proxySock,
unbufProxyOut, unbufProxyIn, keyFactory, blockToken, proxySource);
unbufProxyOut = saslStreams.out;
unbufProxyIn = saslStreams.in;
proxyOut = new DataOutputStream(new BufferedOutputStream(unbufProxyOut,
smallBufferSize)); // 到源DN的输出流
proxyReply = new DataInputStream(new BufferedInputStream(unbufProxyIn,
ioFileBufferSize)); // 接收源DN数据的输入流
/* send request to the proxy */
IoeDuringCopyBlockOperation = true;
new Sender(proxyOut).copyBlock(block, blockToken);
IoeDuringCopyBlockOperation = false;
// receive the response from the proxy
BlockOpResponseProto copyResponse = BlockOpResponseProto.parseFrom(
PBHelperClient.vintPrefixed(proxyReply));
String logInfo = "copy block " + block + " from "
+ proxySock.getRemoteSocketAddress();
DataTransferProtoUtil.checkBlockOpStatus(copyResponse, logInfo, true);
// get checksum info about the block we're copying
ReadOpChecksumInfoProto checksumInfo = copyResponse.getReadOpChecksumInfo();
DataChecksum remoteChecksum = DataTransferProtoUtil.fromProto(
checksumInfo.getChecksum());
// open a block receiver and check if the block does not exist
setCurrentBlockReceiver(getBlockReceiver(block, storageType,
proxyReply, proxySock.getRemoteSocketAddress().toString(),
proxySock.getLocalSocketAddress().toString(),
null, 0, 0, 0, "", null, datanode, remoteChecksum,
CachingStrategy.newDropBehind(), false, false, storageId));
// receive a block 这里复用了写数据时,DN之间传输数据的方法
blockReceiver.receiveBlock(null, null, replyOut, null,
dataXceiverServer.balanceThrottler, null, true);
// notify name node
final Replica r = blockReceiver.getReplica();
datanode.notifyNamenodeReceivedBlock(
block, delHint, r.getStorageUuid(), r.isOnTransientStorage());
LOG.info("Moved {} from {}, delHint={}",
block, peer.getRemoteAddressString(), delHint);
}
} catch (IOException ioe) {
opStatus = ERROR;
if (ioe instanceof BlockPinningException) {
opStatus = Status.ERROR_BLOCK_PINNED;
}
errMsg = "opReplaceBlock " + block + " received exception " + ioe;
LOG.info(errMsg);
if (!IoeDuringCopyBlockOperation) {
// Don't double count IO errors
incrDatanodeNetworkErrors();
}
throw ioe;
} finally {
// receive the last byte that indicates the proxy released its thread resource
if (opStatus == SUCCESS && proxyReply != null) {
try {
proxyReply.readChar();
} catch (IOException ignored) {
}
}
// now release the thread resource
dataXceiverServer.balanceThrottler.release();
// send response back
try {
sendResponse(opStatus, errMsg);
} catch (IOException ioe) {
LOG.warn("Error writing reply back to {}",
peer.getRemoteAddressString());
incrDatanodeNetworkErrors();
}
IOUtils.closeStream(proxyOut);
IOUtils.closeStream(blockReceiver);
IOUtils.closeStream(proxyReply);
IOUtils.closeStream(replyOut);
}
//update metrics
datanode.metrics.addReplaceBlockOp(elapsed());
}
2.copyBlock方法
copyBlock方法仅在replaceBlock方法中使用,可以说和replaceBlock方法绑定了。
注意,下列代码是在源DN上调用的,是要写回数据块给调用replaceBlock方法的机器的。
@Override
public void copyBlock(final ExtendedBlock block,
final Token<BlockTokenIdentifier> blockToken) throws IOException {
updateCurrentThreadName("Copying block " + block);
DataOutputStream reply = getBufferedOutputStream(); // 传输chunk的输出流
checkAccess(reply, true, block, blockToken, Op.COPY_BLOCK,
BlockTokenIdentifier.AccessMode.COPY); // 鉴权
if (datanode.data.getPinning(block)) {
String msg = "Not able to copy block " + block.getBlockId() + " " +
"to " + peer.getRemoteAddressString() + " because it's pinned ";
LOG.info(msg);
sendResponse(Status.ERROR_BLOCK_PINNED, msg);
return;
}
if (!dataXceiverServer.balanceThrottler.acquire()) { // not able to start
String msg = "Not able to copy block " + block.getBlockId() + " " +
"to " + peer.getRemoteAddressString() + " because threads " +
"quota is exceeded.";
LOG.info(msg);
sendResponse(ERROR, msg);
return;
}
BlockSender blockSender = null;
boolean isOpSuccess = true;
try {
// check if the block exists or not
blockSender = new BlockSender(block, 0, -1, false, false, true, datanode,
null, CachingStrategy.newDropBehind());
OutputStream baseStream = getOutputStream();
// send status first
writeSuccessWithChecksumInfo(blockSender, reply);
long beginRead = Time.monotonicNow();
// send block content to the target 发送数据块的代码就在这里
long read = blockSender.sendBlock(reply, baseStream,
dataXceiverServer.balanceThrottler);
long duration = Time.monotonicNow() - beginRead;
datanode.metrics.incrBytesRead((int) read);
datanode.metrics.incrBlocksRead();
datanode.metrics.incrTotalReadTime(duration);
LOG.info("Copied {} to {}", block, peer.getRemoteAddressString());
} catch (IOException ioe) {
isOpSuccess = false;
LOG.info("opCopyBlock {} received exception {}", block, ioe.toString());
incrDatanodeNetworkErrors();
// Normally the client reports a bad block to the NN. However if the
// meta file is corrupt or an disk error occurs (EIO), then the client
// never gets a chance to do validation, and hence will never report
// the block as bad. For some classes of IO exception, the DN should
// report the block as bad, via the handleBadBlock() method
datanode.handleBadBlock(block, ioe, false);
throw ioe;
} finally {
dataXceiverServer.balanceThrottler.release();
if (isOpSuccess) {
try {
// send one last byte to indicate that the resource is cleaned.
reply.writeChar('d');
} catch (IOException ignored) {
}
}
IOUtils.closeStream(reply);
IOUtils.closeStream(blockSender);
}
//update metrics
datanode.metrics.addCopyBlockOp(elapsed());
}