offerService包含了DN主循环线程的核心功能,DN对其提供保护,如果该函数执行失败,则由DN线程sleep5秒后重新执行,在DN的线程体中,我们可以看到该逻辑。对于offerService中的部分功能,以前的文章里已经描述过,比如心跳、异步块报告,但都是单独描述,他们之间的协调可以通过offerService体现出来,例如执行先后顺序,执行间隔这里都有体现。
public void offerService() throws Exception {
LOG.info("using BLOCKREPORT_INTERVAL of " + blockReportInterval + "msec" +
" Initial delay: " + initialBlockReportDelay + "msec");
//
// Now loop for a long time....
//
while (shouldRun) {
try {
//记录循环启动时间
long startTime = now();
//
// Every so often, send heartbeat or block-report
//
//如果时间超出了心跳间隔,则需发送心跳
if (startTime - lastHeartbeat > heartBeatInterval) {
//心跳的详细内容上篇已经讲过,这里不再赘述
lastHeartbeat = startTime;
DatanodeCommand[] cmds = namenode.sendHeartbeat(dnRegistration,
data.getCapacity(),
data.getDfsUsed(),
data.getRemaining(),
xmitsInProgress.get(),
getXceiverCount());
myMetrics.addHeartBeat(now() - startTime);
//处理返回命令
if (!processCommand(cmds))
continue;
}
// 计算自己收到的block
Block [] blockArray=null;
String [] delHintArray=null;
synchronized(receivedBlockList) {
synchronized(delHints) {
int numBlocks = receivedBlockList.size();
if (numBlocks > 0) {
if(numBlocks!=delHints.size()) {
LOG.warn("Panic: receiveBlockList and delHints are not of the same length" );
}
//
// Send newly-received blockids to namenode
//
blockArray = receivedBlockList.toArray(new Block[numBlocks]);
delHintArray = delHints.toArray(new String[numBlocks]);
}
}
}
if (blockArray != null) {
if(delHintArray == null || delHintArray.length != blockArray.length ) {
LOG.warn("Panic: block array & delHintArray are not the same" );
}
//向NN回报自己收到的块,服务端会根据参数信息修改blocks->datanode,更新节点容量等操作
namenode.blockReceived(dnRegistration, blockArray, delHintArray);
synchronized (receivedBlockList) {
synchronized (delHints) {
//向NN发送信息成功后,则更新本地信息
for(int i=0; i<blockArray.length; i++) {
receivedBlockList.remove(blockArray[i]);
delHints.remove(delHintArray[i]);
}
}
}
}
// 判断是否应该发送块报告
if (startTime - lastBlockReport > blockReportInterval) {
if (data.isAsyncBlockReportReady()) {
// 创建块报告
long brCreateStartTime = now();
Block[] bReport = data.retrieveAsyncBlockReport();
// 发送块报告,NN会对比新旧块报告,并更新block-->datanode映射,无效块会删除,新增块会添加
long brSendStartTime = now();
DatanodeCommand cmd = namenode.blockReport(dnRegistration,
BlockListAsLongs.convertToArrayLongs(bReport));
// 日志记录:块报告生成时间、RPC调用时间,可根据该日志判断磁盘IO,网络传输的大致情况
long brSendCost = now() - brSendStartTime;
long brCreateCost = brSendStartTime - brCreateStartTime;
myMetrics.addBlockReport(brSendCost);
LOG.info("BlockReport of " + bReport.length
+ " blocks took " + brCreateCost + " msec to generate and "
+ brSendCost + " msecs for RPC and NN processing");
//更新最后块报告时间
if (resetBlockReportTime) {
lastBlockReport = startTime -
R.nextInt((int)(blockReportInterval));
resetBlockReportTime = false;
} else {
/* say the last block report was at 8:20:14. The current report
* should have started around 9:20:14 (default 1 hour interval).
* If current time is :
* 1) normal like 9:20:18, next report should be at 10:20:14
* 2) unexpected like 11:35:43, next report should be at
* 12:20:14
*/
lastBlockReport += (now() - lastBlockReport) /
blockReportInterval * blockReportInterval;
}
//处理返回命令,正常为空
processCommand(cmd);
} else {
//未到发送时间,则请求异步块报告
data.requestAsyncBlockReport();
if (lastBlockReport > 0) { // this isn't the first report
long waitingFor =
startTime - lastBlockReport - blockReportInterval;
String msg = "Block report is due, and been waiting for it for " +
(waitingFor/1000) + " seconds...";
if (waitingFor > LATE_BLOCK_REPORT_WARN_THRESHOLD) {
LOG.warn(msg);
} else if (waitingFor > LATE_BLOCK_REPORT_INFO_THRESHOLD) {
LOG.info(msg);
} else if (LOG.isDebugEnabled()) {
LOG.debug(msg);
}
}
}
}
// 启动 block scanner,详见:http://blog.csdn.net/lihm0_1/article/details/12437099
if (blockScanner != null && blockScannerThread == null &&
upgradeManager.isUpgradeCompleted()) {
LOG.info("Starting Periodic block scanner.");
blockScannerThread = new Daemon(blockScanner);
blockScannerThread.start();
}
//计算等待时间,注意如果空闲情况下才等待,如果DN繁忙,有新块未处理,是不等待的
long waitTime = heartBeatInterval - (System.currentTimeMillis() - lastHeartbeat);
synchronized(receivedBlockList) {
if (waitTime > 0 && receivedBlockList.size() == 0) {//注意判断条件
try {
receivedBlockList.wait(waitTime);//开始等待
} catch (InterruptedException ie) {
}
delayBeforeBlockReceived();
}
} // synchronized
} catch(RemoteException re) {
String reClass = re.getClassName();
if (UnregisteredDatanodeException.class.getName().equals(reClass) ||
DisallowedDatanodeException.class.getName().equals(reClass) ||
IncorrectVersionException.class.getName().equals(reClass)) {
LOG.warn("DataNode is shutting down: " +
StringUtils.stringifyException(re));
shutdown();
return;
}
LOG.warn(StringUtils.stringifyException(re));
} catch (IOException e) {
LOG.warn(StringUtils.stringifyException(e));
}
} // while (shouldRun)
} // offerService