Hadoop DataNode启动之数据目录校验

   Datanode在启动过程中会校验数据目录进行较多的检验,比如是否需要升级、格式化,是否和nn版本一致,是否需要rollback,并最终更新current目录下的VERSION文件信息。下面从startDataNode这个函数入手开始介绍整个校验流程,这个函数比较长,我们只针对校验部分详细分析,因为这个函数包含的内容太多了,相关内容以后还会介绍

 

  void startDataNode(Configuration conf,
                     AbstractList<File>dataDirs, SecureResources resources
                     ) throws IOException {
    if(UserGroupInformation.isSecurityEnabled()&& resources == null)
      throw new RuntimeException("Cannotstart secure cluster without " +
               "privilegedresources.");
   
    this.secureResources = resources;
    //获得本地主机名,如果没有设置slave.host.name则通过下面两个参数,通过网口和DNS来获得
    if (conf.get("slave.host.name")!= null) {
      machineName =conf.get("slave.host.name");  
    }
    if (machineName == null) {
    //注意两个可配置参数,网口:ethX DNS服务器:1.1.1.1类似这种形式
      machineName = DNS.getDefaultHost(
                                     conf.get("dfs.datanode.dns.interface","default"),
                                    conf.get("dfs.datanode.dns.nameserver","default"));
    }
    //获得配置文件中NN的地址,用于下面创建动态代理,因为在创建动态代理时要和NN通信
    InetSocketAddress nameNodeAddr =NameNode.getServiceAddress(conf, true);
   
    //socket连接超时时间
    this.socketTimeout =  conf.getInt("dfs.socket.timeout",
                                     HdfsConstants.READ_TIMEOUT);
    //socket写超时时间
    this.socketWriteTimeout =conf.getInt("dfs.datanode.socket.write.timeout",
                                         HdfsConstants.WRITE_TIMEOUT);
    //影响本datanode向客户端或其他datanode发送数据块的缓存分配尺寸,具体可见sendBlock函数
    this.transferToAllowed =conf.getBoolean("dfs.datanode.transferTo.allowed",
                                            true);
    //写包的大小
    this.writePacketSize =conf.getInt("dfs.write.packet.size", 64*1024);
   
    //创建注册体,用于DN向NN注册时,服务端的校验
    InetSocketAddress socAddr =DataNode.getStreamingAddr(conf);
    int tmpPort = socAddr.getPort();
    storage = new DataStorage();
    // construct registration
    this.dnRegistration = newDatanodeRegistration(machineName + ":" + tmpPort);
 
    // 创建代理并通过握手获得NN的版本、ID信息
    this.namenode = (DatanodeProtocol)
      RPC.waitForProxy(DatanodeProtocol.class,
                      DatanodeProtocol.versionID,
                       nameNodeAddr,
                       conf);
    // get version and id info from thename-node
    NamespaceInfo nsInfo = handshake();
    StartupOption startOpt =getStartupOption(conf);
    assert startOpt != null : "Startupoption must be set.";
   
    boolean simulatedFSDataset =
       conf.getBoolean("dfs.datanode.simulateddatastorage", false);
    if (simulatedFSDataset) {
     //因为我这里不是伪分布式,所以会走下面的逻辑
    } else {
      // 这里才开始校验,也是我们重点关注的部分
      storage.recoverTransitionRead(nsInfo,dataDirs, startOpt);
      // adjust
     this.dnRegistration.setStorageInfo(storage);
      //initialize data node internal structure
      this.data = new FSDataset(storage, conf);
    }
     
         .................
  }

下面看下是如何真正校验的,总的来说分三步 1、对数据目录做一致性检查  2、执行升级或回滚的操作流程  3、更新版本文件信息

  void recoverTransitionRead(NamespaceInfo nsInfo,
                            Collection<File> dataDirs,
                             StartupOptionstartOpt
                             ) throws IOException {
    assert FSConstants.LAYOUT_VERSION == nsInfo.getLayoutVersion() :
      "Data-node and name-node layout versions must be thesame.";
   
    // 1. For each data directory calculate its state and
    // check whether all is consistent before transitioning.
    // Format and recover.
    this.storageID = "";
    this.storageDirs = new ArrayList<StorageDirectory>(dataDirs.size());
    ArrayList<StorageState> dataDirStates = new ArrayList<StorageState>(dataDirs.size());
    for(Iterator<File> it =dataDirs.iterator(); it.hasNext();) {
      File dataDir = it.next();
      StorageDirectory sd = new StorageDirectory(dataDir);
      StorageState curState;
      try {
        //数据目录状态分析,是否存在,权限分析、是否需要升级
        curState = sd.analyzeStorage(startOpt);
        // 根据检测后的状态分别执行不同操作,正常启动、格式化、恢复
        switch(curState) {
        case NORMAL:
          break;
        case NON_EXISTENT:
          // 数据目录不存在,则直接忽略
          LOG.info("Storage directory " + dataDir + " does not exist.");
          it.remove();
          continue;
        case NOT_FORMATTED: // format
          LOG.info("Storage directory " + dataDir + " is not formatted.");
          LOG.info("Formatting ...");
          //数据目录格式化
          format(sd, nsInfo);
          break;
        default:  // 从上一次升级或回滚的失败中恢复
          sd.doRecover(curState);
        }
      } catch (IOException ioe) {
        sd.unlock();
        throw ioe;
      }
      // add to the storage list
      addStorageDir(sd);
      dataDirStates.add(curState);
    }
 
    if (dataDirs.size() == 0)  // none of the data dirs exist
      throw new IOException(
                            "All specified directories are notaccessible or do not exist.");
 
    // 2.执行真正的升级或回滚操作
    for(int idx = 0; idx < getNumStorageDirs(); idx++) {
      doTransition(getStorageDir(idx), nsInfo,startOpt);
      assert this.getLayoutVersion() == nsInfo.getLayoutVersion() :
        "Data-node and name-node layoutversions must be the same.";
      assert this.getCTime() == nsInfo.getCTime() :
        "Data-node and name-node CTimes mustbe the same.";
    }
   
    // 3. 更新所有目录的版本文件信息
    this.writeAll();
  }
现在看如何分析数据目录的,这决定了后两步的操作

    public StorageState analyzeStorage(StartupOptionstartOpt) throws IOException {
      assert root != null : "rootis null";
      String rootPath = root.getCanonicalPath();
      try { // 是否存在
        if (!root.exists()) {
          // storage directory does not exist
          if (startOpt != StartupOption.FORMAT) {
            LOG.info("Storage directory " + rootPath + " does not exist.");
            return StorageState.NON_EXISTENT;
          }
          LOG.info(rootPath + " does not exist. Creating ...");
          if (!root.mkdirs())
            throw new IOException("Cannotcreate directory " + rootPath);
        }
        // 是否为一个目录
        if (!root.isDirectory()) {
          LOG.info(rootPath + "is not a directory.");
          return StorageState.NON_EXISTENT;
        }
        //是否有些权限
        if (!root.canWrite()) {
          LOG.info("Cannot access storage directory" + rootPath);
          return StorageState.NON_EXISTENT;
        }
      } catch(SecurityException ex) {
        LOG.info("Cannot access storage directory" + rootPath, ex);
        return StorageState.NON_EXISTENT;
      }
 
      this.lock(); // 对数据目录加锁,防止并发访问
 
      if (startOpt == HdfsConstants.StartupOption.FORMAT)
        return StorageState.NOT_FORMATTED;
      if (startOpt != HdfsConstants.StartupOption.IMPORT) {
        //make sure no conversion is required
        checkConversionNeeded(this);
      }
 
      // 获得版本文件
      File versionFile = getVersionFile();
      boolean hasCurrent = versionFile.exists();
 
      // 一系列的临时文件校验,如果这些临时目录存在,则说明这个存储是不正常的,下面会看到这些校验
      boolean hasPrevious = getPreviousDir().exists();
      boolean hasPreviousTmp = getPreviousTmp().exists();
      boolean hasRemovedTmp = getRemovedTmp().exists();
      boolean hasFinalizedTmp =getFinalizedTmp().exists();
      boolean hasCheckpointTmp = getLastCheckpointTmp().exists();
 
      if (!(hasPreviousTmp || hasRemovedTmp
          || hasFinalizedTmp ||hasCheckpointTmp)) {
        // no temp dirs - no recovery
        if (hasCurrent)
          return StorageState.NORMAL;
        if (hasPrevious)
          throw new InconsistentFSStateException(root,
                              "version file in current directory ismissing.");
        return StorageState.NOT_FORMATTED;
      }
 
      if ((hasPreviousTmp?1:0) + (hasRemovedTmp?1:0)
          + (hasFinalizedTmp?1:0) +(hasCheckpointTmp?1:0) > 1)
        // more than one temp dirs
        throw new InconsistentFSStateException(root,
                                               "too many temporary directories.");
 
      // # of temp dirs == 1 should eitherrecover or complete a transition
      if (hasCheckpointTmp) {
        return hasCurrent ? StorageState.COMPLETE_CHECKPOINT
                          : StorageState.RECOVER_CHECKPOINT;
      }
 
      if (hasFinalizedTmp) {
        if (hasPrevious)
          throw new InconsistentFSStateException(root,
                                                STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_FINALIZED
                                                + "cannotexist together.");
        return StorageState.COMPLETE_FINALIZE;
      }
 
      if (hasPreviousTmp) {
        if (hasPrevious)
          throw new InconsistentFSStateException(root,
                                                STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_PREVIOUS
                                                + "cannot exist together.");
        if (hasCurrent)
          return StorageState.COMPLETE_UPGRADE;
        return StorageState.RECOVER_UPGRADE;
      }
     
      assert hasRemovedTmp : "hasRemovedTmp must be true";
      if (!(hasCurrent ^ hasPrevious))
        throw new InconsistentFSStateException(root,
                                               "one and only one directory " + STORAGE_DIR_CURRENT
                                              + " or" + STORAGE_DIR_PREVIOUS
                                              + "must be present when " + STORAGE_TMP_REMOVED
                                              + "exists.");
      if (hasCurrent)
        return StorageState.COMPLETE_ROLLBACK;
      return StorageState.RECOVER_ROLLBACK;
}
 
在第二步中会做升级或回滚的操作,如果启动参数为NORMAL则直接返回

private void doTransition( StorageDirectory sd,
                             NamespaceInfonsInfo,
                             StartupOptionstartOpt
                             ) throws IOException {
    //是否需要回滚
    if (startOpt == StartupOption.ROLLBACK)
      doRollback(sd, nsInfo); // rollback if applicable
    //读取版本文件信息
    sd.read();
    //检测版本文件
    checkVersionUpgradable(this.layoutVersion);
    assert this.layoutVersion >= FSConstants.LAYOUT_VERSION :
      "Future version is not allowed";
    //namespaceid校验
    if (getNamespaceID() !=nsInfo.getNamespaceID())
      throw new IOException(
                            "Incompatible namespaceIDs in " +        sd.getRoot().getCanonicalPath()
                            + ": namenode namespaceID = " + nsInfo.getNamespaceID()
                            + "; datanode namespaceID = " + getNamespaceID());
   //layout版本校验
    if (this.layoutVersion == FSConstants.LAYOUT_VERSION
        && this.cTime == nsInfo.getCTime())
      return; //regular startup
    // verify necessity of a distributed upgrade
    verifyDistributedUpgradeProgress(nsInfo);
    if (this.layoutVersion > FSConstants.LAYOUT_VERSION
        || this.cTime < nsInfo.getCTime()) {
      //执行升级操作
      doUpgrade(sd, nsInfo);  // upgrade
      return;
    }
    // layoutVersion == LAYOUT_VERSION && this.cTime> nsInfo.cTime
    // must shutdown
    throw new IOException("Datanodestate: LV = " + this.getLayoutVersion()
                          + " CTime = " + this.getCTime()
                          + " is newer than the namespace state:LV = "
                          +nsInfo.getLayoutVersion()
                          + " CTime = " + nsInfo.getCTime());
  }
至此数据目录的校验如果成功,则会继续执行,需要注意的是在升级和回滚阶段的操作还是比较复杂的,如果正常启动则比较简单。下一篇讲dn内部数据结构的初始化。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值