hadoop2.7.0源码之DataNode启动流程

本文将结合hadoop2.7.0版本的源码与UML图对DataNode的初始化流程进行深入剖析,旨在更深入地理解DataNode初始化的整体逻辑

第一步:查看DataNode的入口方法main()

public static void main(String args[]) {
  if (DFSUtil.parseHelpArgument(args, DataNode.USAGE, System.out, true)) {
    System.exit(0);
  }
  //TODO:调用
  secureMain(args, null);
}

第二、三步:调用DataNode的createDataNode()方法

public static void secureMain(String args[], SecureResources resources) {
  int errorCode = 0;
  try {
    StringUtils.startupShutdownMessage(DataNode.class, args, LOG);
    //TODO:初始化DataNode
    DataNode datanode = createDataNode(args, null, resources);
    if (datanode != null) {
      //TODO:阻塞
      datanode.join();
    } else {
      errorCode = 1;
    }
  ...
}

第四步:调用instantiateDataNode()方法

public static DataNode createDataNode(String args[], Configuration conf,
    SecureResources resources) throws IOException {
  //TODO:实例化DataNode
  DataNode dn = instantiateDataNode(args, conf, resources);
  if (dn != null) {
    //TODO:启动DataNode后台线程
    dn.runDatanodeDaemon();
  }
  return dn;
}

第五步:调用makeInstance()方法

public static DataNode instantiateDataNode(String args [], Configuration conf,
    SecureResources resources) throws IOException {
  ...
  //TODO:关键代码
  return makeInstance(dataLocations, conf, resources);
}

第六步:通过new实例化DataNode

static DataNode makeInstance(Collection<StorageLocation> dataDirs,
    Configuration conf, SecureResources resources) throws IOException {
  ...
  //TODO:实例化DataNode
  return new DataNode(conf, locations, resources);
}

第七步:在DataNode构造方法内调用startDataNode()启动DataNode

DataNode(final Configuration conf,
         final List<StorageLocation> dataDirs,
         final SecureResources resources) throws IOException {
  ...
  try {
    hostName = getHostName(conf);
    LOG.info("Configured hostname is " + hostName);
    //TODO:启动DataNode
    startDataNode(conf, dataDirs, resources);
  } catch (IOException ie) {
    shutdown();
    throw ie;
  }
  ...
}

第八步:在DataNode的startDataNode()方法内主要对5个重要组件进行初始化,分别为DataXceiver、HttpServer、RpcServer、BlockPoolManager、心跳机制

void startDataNode(Configuration conf, 
                   List<StorageLocation> dataDirs,
                   SecureResources resources
                   ) throws IOException {
  ...
  storage = new DataStorage();
  
  // global DN settings
  registerMXBean();
  //TODO:初始化DataXceiver
  initDataXceiver(conf);
  //TODO:启动HttpServer服务
  startInfoServer(conf);
  pauseMonitor = new JvmPauseMonitor(conf);
  pauseMonitor.start();
  // BlockPoolTokenSecretManager is required to create ipc server.
  this.blockPoolTokenSecretManager = new BlockPoolTokenSecretManager();
  // Login is done by now. Set the DN user name.
  dnUserName = UserGroupInformation.getCurrentUser().getShortUserName();
  LOG.info("dnUserName = " + dnUserName);
  LOG.info("supergroup = " + supergroup);
  //TODO:启动RPC
  initIpcServer(conf);
  metrics = DataNodeMetrics.create(conf, getDisplayName());
  metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
  //TODO:创建BlockPoolManager
  //BlockPool:一个集群就有一个BlockPool
  //如果使用的是联邦机制,就会有多个NameNode,也就会有多个BlockPool,一个联邦就是一个BlockPool
  //联邦一:hadoop1(Active) hadoop2(StandBy)(BlockPool是同一个)
  //联邦二:hadoop3(Active) hadoop4(StandBy)(BlockPool是同一个)
  blockPoolManager = new BlockPoolManager(this);
  //TODO:涉及心跳机制
  blockPoolManager.refreshNamenodes(conf);
  ...
}

第九步:先查看initDataXceiver()如何初始化DataXceiver,方法中主要是实例化DataXceiverServer,该类实现了Runnable接口

private void initDataXceiver(Configuration conf) throws IOException {
  ///
  //TODO:实例化一个DataXceiverServer
  //TODO:该类是DataNode用来接收客户端与其它DataNode传过来数据的服务
  xserver = new DataXceiverServer(tcpPeerServer, conf, this);
  //设置为守护线程
  this.dataXceiverServer = new Daemon(threadGroup, xserver);
  this.threadGroup.setDaemon(true); // auto destroy when empty
  ...
}

第十、十一、十二步:调用DataNode的startInfoServer()方法启动HttpServer2服务,方法内实例化了HttpServer2并启动,提供了访问的该DataNode的http请求

private void startInfoServer(Configuration conf)
  throws IOException {
  ...
  //TODO:实例化HttpServer2,用于接收http请求
  HttpServer2.Builder builder = new HttpServer2.Builder()
    .setName("datanode")
    .setConf(conf).setACL(new AccessControlList(conf.get(DFS_ADMIN, " ")))
    .addEndpoint(URI.create("http://localhost:0"))
    .setFindPort(true);
  this.infoServer = builder.build();
  //TODO:往该httpServlet绑定多个servlet
  this.infoServer.addInternalServlet(null, "/streamFile/*", StreamFile.class);
  this.infoServer.addInternalServlet(null, "/getFileChecksum/*",
      FileChecksumServlets.GetServlet.class);
  
  this.infoServer.setAttribute("datanode", this);
  this.infoServer.setAttribute(JspHelper.CURRENT_CONF, conf);
  this.infoServer.addServlet(null, "/blockScannerReport",
                             BlockScanner.Servlet.class);
  //TODO:启动http服务
  this.infoServer.start();
  ...
}

第十三、十四步:调用DataNode的initIpcServer()方法启动RPC服务,方法主要是实例化ipcServer

private void initIpcServer(Configuration conf) throws IOException {
  InetSocketAddress ipcAddr = NetUtils.createSocketAddr(
      conf.getTrimmed(DFS_DATANODE_IPC_ADDRESS_KEY));
  
  ...
  //TODO:创建一个RPC的服务端
  ipcServer = new RPC.Builder(conf)
      .setProtocol(ClientDatanodeProtocolPB.class)
      .setInstance(service)
      .setBindAddress(ipcAddr.getHostName())
      .setPort(ipcAddr.getPort())
      .setNumHandlers(
          conf.getInt(DFS_DATANODE_HANDLER_COUNT_KEY,
              DFS_DATANODE_HANDLER_COUNT_DEFAULT)).setVerbose(false)
      .setSecretManager(blockPoolTokenSecretManager).build();
  
 ...
}

第十五步:通过new实例化BlockPoolManager,

//TODO:创建BlockPoolManager
//BlockPool:一个集群就有一个BlockPool
//如果使用的是联邦机制,就会有多个NameNode,也就会有多个BlockPool,一个联邦就是一个BlockPool
//联邦一:hadoop1(Active) hadoop2(StandBy)(BlockPool是同一个)
//联邦二:hadoop3(Active) hadoop4(StandBy)(BlockPool是同一个)
blockPoolManager = new BlockPoolManager(this);

//TODO:涉及心跳机制
blockPoolManager.refreshNamenodes(conf);

第十六步:调用BlockPoolManager的refreshNamenodes()方法,该方法主要是往NameNode注册NameNode以及保持心跳机制

void refreshNamenodes(Configuration conf)
    throws IOException {
  ...
  synchronized (refreshNamenodesLock) {
    //TODO:
    doRefreshNamenodes(newAddressMap);
  }
}

第十七、十八步:遍历所有的nameservices,调用startAll()方法启动服务

private void doRefreshNamenodes(
    Map<String, Map<String, InetSocketAddress>> addrMap) throws IOException {
  assert Thread.holdsLock(refreshNamenodesLock);
  Set<String> toRefresh = Sets.newLinkedHashSet();
  Set<String> toAdd = Sets.newLinkedHashSet();
  Set<String> toRemove;
  
  synchronized (this) {
    // Step 1. For each of the new nameservices, figure out whether
    // it's an update of the set of NNs for an existing NS,
    // or an entirely new nameservice.
    /**
     * 如果在联邦机制下,会有多个nameservice
     */
    for (String nameserviceId : addrMap.keySet()) {
      if (bpByNameserviceId.containsKey(nameserviceId)) {
        toRefresh.add(nameserviceId);
      } else {
        toAdd.add(nameserviceId);
      }
    }
    
    // Step 2. Any nameservices we currently have but are no longer present
    // need to be removed.
    toRemove = Sets.newHashSet(Sets.difference(
        bpByNameserviceId.keySet(), addrMap.keySet()));
    
    assert toRefresh.size() + toAdd.size() ==
      addrMap.size() :
        "toAdd: " + Joiner.on(",").useForNull("<default>").join(toAdd) +
        "  toRemove: " + Joiner.on(",").useForNull("<default>").join(toRemove) +
        "  toRefresh: " + Joiner.on(",").useForNull("<default>").join(toRefresh);
    
    // Step 3. Start new nameservices
    if (!toAdd.isEmpty()) {
      LOG.info("Starting BPOfferServices for nameservices: " +
          Joiner.on(",").useForNull("<default>").join(toAdd));
      //TODO:遍历所有的联邦,一个联邦里面会有两个NameNode(HA)
      for (String nsToAdd : toAdd) {
        ArrayList<InetSocketAddress> addrs =
          Lists.newArrayList(addrMap.get(nsToAdd).values());
        //TODO:一个联邦对应一个BPOfferService
        //一个联邦里面的一个NameNode就是一个BPServiceActor
        //即HA下一个BPOfferService对应两个BPServiceActor
        BPOfferService bpos = createBPOS(addrs);
        bpByNameserviceId.put(nsToAdd, bpos);
        offerServices.add(bpos);
      }
    }
    //TODO:启动服务
    startAll();
  }

第十九、二十步:在BlockPoolManager中遍历所有的BPOfferService,并分别调用其start()方法

synchronized void startAll() throws IOException {
  try {
    UserGroupInformation.getLoginUser().doAs(
        new PrivilegedExceptionAction<Object>() {
          @Override
          public Object run() throws Exception {
            //TODO:遍历所有的BPOfferService 遍历所有的联邦
            for (BPOfferService bpos : offerServices) {
              //TODO:
              bpos.start();
            }
            return null;
          }
        });
  ...
}

第二十一步:遍历bpOfferService中的所有BPServiceActor,并调用其start()方法

void start() {
  //TODO:一个bpOfferService里面就会有多个Actor
  for (BPServiceActor actor : bpServices) {
    //TODO:DataNode进行注册和心跳
    actor.start();
  }
}
void start() {
  if ((bpThread != null) && (bpThread.isAlive())) {
    //Thread is started already
    return;
  }
  bpThread = new Thread(this, formatThreadName());
  //TODO:设置为守护线程
  bpThread.setDaemon(true); // needed for JUnit testing
  //TODO:启动线程,即调用线程的run()方法
  bpThread.start();
}

第二十二步:由于BPServiceActor实现了Runnable接口,调用期start()方法,最后会执行其run()方法。方法内主要涉及两个重要的方法调用,分别connectToNNAndHandshake()向NameNode注册、offerService()向NameNode发送心跳

public void run() {
  LOG.info(this + " starting to offer service");
  try {
    while (true) {
      // init stuff
      try {
        // setup storage
        //TODO:注册核心代码
        connectToNNAndHandshake();
        break;
      } catch (IOException ioe) {
        // Initial handshake, storage recovery or registration failed
        runningState = RunningState.INIT_FAILED;
        if (shouldRetryInit()) {
          // Retry until all namenode's of BPOS failed initialization
          LOG.error("Initialization failed for " + this + " "
              + ioe.getLocalizedMessage());
          //TODO:睡眠5秒
          sleepAndLogInterrupts(5000, "initializing");
        } else {
          runningState = RunningState.FAILED;
          LOG.fatal("Initialization failed for " + this + ". Exiting. ", ioe);
          return;
        }
      }
    }
    runningState = RunningState.RUNNING;
    while (shouldRun()) {
      try {
        //TODO:发送心跳
        offerService();
      } catch (Exception ex) {
        LOG.error("Exception in BPOfferService for " + this, ex);
        sleepAndLogInterrupts(5000, "offering service");
      }
    }
  ...
}

第二十三步:在connectToNNAndHandshake()方法内主要做了两件事,一是获取到NameNode的代理对象,二是向NameNode注册

private void connectToNNAndHandshake() throws IOException {
  // get NN proxy
  //TODO:获取到namenode的代理
  bpNamenode = dn.connectToNN(nnAddr);
  // First phase of the handshake with NN - get the namespace
  // info.
  NamespaceInfo nsInfo = retrieveNamespaceInfo();
  
  // Verify that this matches the other NN in this HA pair.
  // This also initializes our block pool in the DN if we are
  // the first NN connection for this BP.
  bpos.verifyAndSetNamespaceInfo(nsInfo);
  
  // Second phase of the handshake with the NN.
  //TODO:注册
  register(nsInfo);
}

第二十四步:在register()在首先创建注册信息bpRegistration,然后通过RPC调用服务端NameNodeRpcServer的registerDatanode()方法

void register(NamespaceInfo nsInfo) throws IOException {
  // The handshake() phase loaded the block pool storage
  // off disk - so update the bpRegistration object from that info
  //TODO:创建注册信息
  bpRegistration = bpos.createRegistration();
  LOG.info(this + " beginning handshake with NN");
  while (shouldRun()) {
    try {
      // Use returned registration from namenode with updated fields
      //TODO:调用服务端的registerDatanode方法进行注册
      //注意:根据RPC的调用原则,这里其实调用的是服务端NameNodeRpcServer的registerDatanode()方法
      bpRegistration = bpNamenode.registerDatanode(bpRegistration);
      //如果执行到这里,说明注册过程已经完成了
      bpRegistration.setNamespaceInfo(nsInfo);
      break;
    ...
}

第二十五步:查看NameNodeRpcServer的registerDatanode()方法,其内部调用了FSNamesystem.registerDatanode()

public DatanodeRegistration registerDatanode(DatanodeRegistration nodeReg)
    throws IOException {
  checkNNStartup();
  verifySoftwareVersion(nodeReg);
  //TODO:注册DataNode
  namesystem.registerDatanode(nodeReg);
  return nodeReg;
}

第二十六步:调用了DatanodeManager.registerDatanode()方法

void registerDatanode(DatanodeRegistration nodeReg) throws IOException {
  writeLock();
  try {
    //TODO:DataNodeManager处理关于DataNode的事
    getBlockManager().getDatanodeManager().registerDatanode(nodeReg);
    checkSafeMode();
  } finally {
    writeUnlock();
  }
}

第二十七步:registerDatanode()主要调用了addDatanode()注册DataNode,并调用heartbeatManager.addDatanode()将注册完成的DataNode加入HeartbeatManger里面进行心跳管理

public void registerDatanode(DatanodeRegistration nodeReg)
    throws DisallowedDatanodeException, UnresolvedTopologyException {
  InetAddress dnAddress = Server.getRemoteIp();
     ....
      // register new datanode
      //TODO:注册DataNode
      addDatanode(nodeDescr);
      // also treat the registration message as a heartbeat
      // no need to update its timestamp
      // because its is done when the descriptor is created
      //TODO:把注册上来的DataNode加入到HeartbeatManager里面
      //后面进行心跳管理
      heartbeatManager.addDatanode(nodeDescr);
      incrementVersionCount(nodeReg.getSoftwareVersion());
      startDecommissioningIfExcluded(nodeDescr);
      success = true;
    ....
}

第二十八步:在addDatanode()中分别往各数据结构中添加datanode相关的信息

void addDatanode(final DatanodeDescriptor node) {
  // To keep host2DatanodeMap consistent with datanodeMap,
  // remove  from host2DatanodeMap the datanodeDescriptor removed
  // from datanodeMap before adding node to host2DatanodeMap.
  synchronized(datanodeMap) {
    //TODO:同一个datanodeMap里面添加数据
    host2DatanodeMap.remove(datanodeMap.put(node.getDatanodeUuid(), node));
  }
  //TODO:往拓朴的数据结构里面加入一条数据
  networktopology.add(node); // may throw InvalidTopologyException
  //TODO:往内存里面加入一条数据
  host2DatanodeMap.add(node);
  checkIfClusterIsNowMultiRack(node);
  if (LOG.isDebugEnabled()) {
    LOG.debug(getClass().getSimpleName() + ".addDatanode: "
        + "node " + node + " is added to datanodeMap.");
  }
}

第二十九步:重新加到BPServiceActor.run()方法里面,查看offerService()方法,每3秒通过sendHeartBeat()发送一次心跳

private void offerService() throws Exception {
  ....
  //TODO:周期性
  while (shouldRun()) {
    try {
      final long startTime = monotonicNow();

      //TODO:心跳是每3秒进行一次
      if (startTime - lastHeartbeat >= dnConf.heartBeatInterval) {

        lastHeartbeat = startTime;
        if (!dn.areHeartbeatsDisabledForTests()) {
          //NameNode是不直接与DataNode进行连接的
          //DataNode发送心跳给NameNode
          //NameNode接收到心跳以后,会返回一些指令
          //DataNode接收到这些指令以后,根据这些指令做对应的操作
          //TODO:发送心跳,返回来的是NameNode给的响应指令
          HeartbeatResponse resp = sendHeartBeat();
          ....
} // offerService

第三十步:在sendHeartBeat()方法内通过RPC调用NameNodeRpcServer的sendHeartbeat()方法向NameNode发送心跳

HeartbeatResponse sendHeartBeat() throws IOException {
  ...
  //TODO:获取NameNode的代理,发送心跳
  //根据RPC调用原则,其实调用的NameNodeRpcServer的sendHeartbeat()方法
  return bpNamenode.sendHeartbeat(bpRegistration,
      reports,
      dn.getFSDataset().getCacheCapacity(),
      dn.getFSDataset().getCacheUsed(),
      dn.getXmitsInProgress(),
      dn.getXceiverCount(),
      numFailedVolumes,
      volumeFailureSummary);
}

第三十一步:在NameNodeRpcServer的sendHeartbeat()方法内调用FSNamesystem.handleHeartbeart()处理DataNode发送过来的心跳

public HeartbeatResponse sendHeartbeat(DatanodeRegistration nodeReg,
    StorageReport[] report, long dnCacheCapacity, long dnCacheUsed,
    int xmitsInProgress, int xceiverCount,
    int failedVolumes, VolumeFailureSummary volumeFailureSummary)
    throws IOException {
  checkNNStartup();
  verifyRequest(nodeReg);
  //TODO:处理DataNode发送过来的心跳
  return namesystem.handleHeartbeat(nodeReg, report,
      dnCacheCapacity, dnCacheUsed, xceiverCount, xmitsInProgress,
      failedVolumes, volumeFailureSummary);
}

第三十二步:通过调用DatanodeManager.handleHeartbeat()方法将DataNode发送过来的心跳转化为DatanodeCommand

HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg,
    StorageReport[] reports, long cacheCapacity, long cacheUsed,
    int xceiverCount, int xmitsInProgress, int failedVolumes,
    VolumeFailureSummary volumeFailureSummary) throws IOException {
  ...
    //TODO:NameNode处理DataNode发送过来的心跳
    DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(
        nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed,
        xceiverCount, maxTransfer, failedVolumes, volumeFailureSummary);
    
    //create ha status
    final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat(
        haContext.getState().getServiceState(),
        getFSImage().getLastAppliedOrWrittenTxId());
    //TODO:给DataNode返回响应
    return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo);
  ...
}

第三十三步:DatanodeManager.handleHeartbeat()中首先获取已有的DataNode信息,然后调用HeartbeatManager.updateHeartbeat()方法更新心跳信息

public DatanodeCommand[] handleHeartbeat(DatanodeRegistration nodeReg,
    StorageReport[] reports, final String blockPoolId,
    long cacheCapacity, long cacheUsed, int xceiverCount, 
    int maxTransfers, int failedVolumes,
    VolumeFailureSummary volumeFailureSummary) throws IOException {
  synchronized (heartbeatManager) {
    synchronized (datanodeMap) {
      DatanodeDescriptor nodeinfo = null;
      try {
        //TODO:问问已有datanodeMap里面获取到注册过来的DataNode信息
        //如果能获取到这个datanode的信息说明以前就注册过了
        //但是如果是第一次,那datanodeMap里面是没有信息的
        nodeinfo = getDatanode(nodeReg);
      } catch(UnregisteredNodeException e) {
        return new DatanodeCommand[]{RegisterCommand.REGISTER};
      }
      
      // Check if this datanode should actually be shutdown instead. 
      if (nodeinfo != null && nodeinfo.isDisallowed()) {
        setDatanodeDead(nodeinfo);
        throw new DisallowedDatanodeException(nodeinfo);
      }
      if (nodeinfo == null || !nodeinfo.isAlive) {
        return new DatanodeCommand[]{RegisterCommand.REGISTER};
      }
      //TODO:更新心跳的重要信息
      heartbeatManager.updateHeartbeat(nodeinfo, reports,
                                       cacheCapacity, cacheUsed,
                                       xceiverCount, failedVolumes,
                                       volumeFailureSummary);
      ...
  }

第三十四步:在HeartbeatManager.updateHeartbeat()调用DatanodeDescriptor.updateHeartbeat()方法更新心跳信息

synchronized void updateHeartbeat(final DatanodeDescriptor node,
    StorageReport[] reports, long cacheCapacity, long cacheUsed,
    int xceiverCount, int failedVolumes,
    VolumeFailureSummary volumeFailureSummary) {
  stats.subtract(node);
  //TODO:更新状态
  node.updateHeartbeat(reports, cacheCapacity, cacheUsed,
    xceiverCount, failedVolumes, volumeFailureSummary);
  stats.add(node);
}

第三十五步:在DatanodeDescriptor.updateHeartbeat()方法内再调用updateHeartbeatState()进行心跳更新

public void updateHeartbeat(StorageReport[] reports, long cacheCapacity,
    long cacheUsed, int xceiverCount, int volFailures,
    VolumeFailureSummary volumeFailureSummary) {
  //TODO:更新状态
  updateHeartbeatState(reports, cacheCapacity, cacheUsed, xceiverCount,
      volFailures, volumeFailureSummary);
  heartbeatedSinceRegistration = true;
}

第三十六步:最终在updateHeartbeatState()方法实现所有的心跳信息更新

public void updateHeartbeatState(StorageReport[] reports, long cacheCapacity,
    long cacheUsed, int xceiverCount, int volFailures,
    VolumeFailureSummary volumeFailureSummary) {
  ...
  //TODO:更新存储信息
  setCacheCapacity(cacheCapacity);
  setCacheUsed(cacheUsed);
  setXceiverCount(xceiverCount);
  //TODO:修改上一次心跳时间
  setLastUpdate(Time.now());
  //TODO:通过心跳来判断一个DataNode是否存活
  setLastUpdateMonotonic(Time.monotonicNow());
 ...
}
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值