YARN源码剖析:NM子服务启动

当NM的服务初始化完成后,就会进入启动阶段

@Override
  protected void serviceStart() throws Exception {
    try {
      doSecureLogin();
    } catch (IOException e) {
      throw new YarnRuntimeException("Failed NodeManager login", e);
    }
    super.serviceStart();
  }
protected void serviceStart() throws Exception {
    //获取到init时候添加的所以service列表
    List<Service> services = getServices();
    if (LOG.isDebugEnabled()) {
      LOG.debug(getName() + ": starting services, size=" + services.size());
    }
    //逐个启动service
    for (Service service : services) {
      // start the service. If this fails that service
      // will be stopped and an exception raised
      service.start();
    }
    super.serviceStart();
  }

相关service列表有:

DeletionService 文件清理服务
NodeHealthCheckerService 节点健康检查服务
NodeResourceMonitor 节点资源监控服务
ContainerManager 容器管理服务
WebServer web服务
AsyncDispatcher 事件分发器
NodeStatusUpdater 节点状态更新服务

DeletionService

没有覆盖父类AbstractService.java中的serviceStart() 方法

NodeHealthCheckerService

没有覆盖父类CompositeServer中的serviceStart()方法
子服务NodeHealthScriptRunner

/**
   * Method used to start the Node health monitoring.
   * 
   */
  @Override
  protected void serviceStart() throws Exception {
    // if health script path is not configured don't start the thread.
    if (!shouldRun(conf)) {
      LOG.info("Not starting node health monitor");
      return;
    }
    //创建一个Timer
    nodeHealthScriptScheduler = new Timer("NodeHealthMonitor-Timer", true);
    // Start the timer task immediately and
    // then periodically at interval time.
    //开启定时调度TimerTask,TimerTask就是去执行shell脚本
    nodeHealthScriptScheduler.scheduleAtFixedRate(timer, 0, intervalTime);
    super.serviceStart();
  }

子服务LocalDirsHandlerService

 /**
  * Method used to start the disk health monitoring, if enabled.
  */
 @Override
 protected void serviceStart() throws Exception {
   if (isDiskHealthCheckerEnabled) {
     //创建Timer
     dirsHandlerScheduler = new Timer("DiskHealthMonitor-Timer", true);
     //定期调度MonitorTimerTask
     dirsHandlerScheduler.scheduleAtFixedRate(monitoringTimerTask,
         diskHealthCheckInterval, diskHealthCheckInterval);
   }
   super.serviceStart();
 }

检测函数如下,因为是对文件系统的检测,无非是读写执行这些权限的检查

@Override
   public void run() {
     checkDirs();
   }
private void checkDirs() {
   boolean disksStatusChange = false;
   Set<String> failedLocalDirsPreCheck =
       new HashSet<String>(localDirs.getFailedDirs());
   Set<String> failedLogDirsPreCheck =
       new HashSet<String>(logDirs.getFailedDirs());

   if (localDirs.checkDirs()) {
     disksStatusChange = true;
   }
   if (logDirs.checkDirs()) {
     disksStatusChange = true;
   }

   Set<String> failedLocalDirsPostCheck =
       new HashSet<String>(localDirs.getFailedDirs());
   Set<String> failedLogDirsPostCheck =
       new HashSet<String>(logDirs.getFailedDirs());

   boolean disksFailed = false;
   boolean disksTurnedGood = false;

   disksFailed =
       disksTurnedBad(failedLocalDirsPreCheck, failedLocalDirsPostCheck);
   disksTurnedGood =
       disksTurnedGood(failedLocalDirsPreCheck, failedLocalDirsPostCheck);

   // skip check if we have new failed or good local dirs since we're going to
   // log anyway
   if (!disksFailed) {
     disksFailed =
         disksTurnedBad(failedLogDirsPreCheck, failedLogDirsPostCheck);
   }
   if (!disksTurnedGood) {
     disksTurnedGood =
         disksTurnedGood(failedLogDirsPreCheck, failedLogDirsPostCheck);
   }

   logDiskStatus(disksFailed, disksTurnedGood);

   if (disksStatusChange) {
     updateDirsAfterTest();
   }

   lastDisksCheckTime = System.currentTimeMillis();
 }

NodeResourceMonitor

NodeResourceMonitorImpl没有覆盖父类CompositeService的serviceStart方法

ContainerManager

@Override
  protected void serviceStart() throws Exception {

    // Enqueue user dirs in deletion context

    Configuration conf = getConfig();
    //
    final InetSocketAddress initialAddress = conf.getSocketAddr(
        YarnConfiguration.NM_BIND_HOST,
        YarnConfiguration.NM_ADDRESS,
        YarnConfiguration.DEFAULT_NM_ADDRESS,
        YarnConfiguration.DEFAULT_NM_PORT);
    boolean usingEphemeralPort = (initialAddress.getPort() == 0);
    if (context.getNMStateStore().canRecover() && usingEphemeralPort) {
      throw new IllegalArgumentException("Cannot support recovery with an "
          + "ephemeral server port. Check the setting of "
          + YarnConfiguration.NM_ADDRESS);
    }
    // If recovering then delay opening the RPC service until the recovery
    // of resources and containers have completed, otherwise requests from
    // clients during recovery can interfere with the recovery process.
    final boolean delayedRpcServerStart =
        context.getNMStateStore().canRecover();

    Configuration serverConf = new Configuration(conf);

    // always enforce it to be token-based.
    serverConf.set(
      CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION,
      SaslRpcServer.AuthMethod.TOKEN.toString());
    
    //实例化YarnRPC类,类配置参数:yarn.ipc.rpc.class,默认是org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC
    YarnRPC rpc = YarnRPC.create(conf);
    
    //创建RPC服务器,用于接收AM的控制,具体协议可参考ContainerManagementProtocol
    server =
        rpc.getServer(ContainerManagementProtocol.class, this, initialAddress, 
            serverConf, this.context.getNMTokenSecretManager(),
            conf.getInt(YarnConfiguration.NM_CONTAINER_MGR_THREAD_COUNT, 
                YarnConfiguration.DEFAULT_NM_CONTAINER_MGR_THREAD_COUNT));
    
    // Enable service authorization?
    if (conf.getBoolean(
        CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, 
        false)) {
      refreshServiceAcls(conf, new NMPolicyProvider());
    }
    
    //containerManager rpc server启动阶段不接受客户端的控制请求
    LOG.info("Blocking new container-requests as container manager rpc" +
    		" server is still starting.");
    this.setBlockNewContainerRequests(true);

    String bindHost = conf.get(YarnConfiguration.NM_BIND_HOST);
    String nmAddress = conf.getTrimmed(YarnConfiguration.NM_ADDRESS);
    String hostOverride = null;
    if (bindHost != null && !bindHost.isEmpty()
        && nmAddress != null && !nmAddress.isEmpty()) {
      //a bind-host case with an address, to support overriding the first
      //hostname found when querying for our hostname with the specified
      //address, combine the specified address with the actual port listened
      //on by the server
      hostOverride = nmAddress.split(":")[0];
    }

    // setup node ID
    InetSocketAddress connectAddress;
    if (delayedRpcServerStart) {
      connectAddress = NetUtils.getConnectAddress(initialAddress);
    } else {
      server.start();
      connectAddress = NetUtils.getConnectAddress(server);
    }
    NodeId nodeId = buildNodeId(connectAddress, hostOverride);
    ((NodeManager.NMContext)context).setNodeId(nodeId);
    this.context.getNMTokenSecretManager().setNodeId(nodeId);
    this.context.getContainerTokenSecretManager().setNodeId(nodeId);

    // start remaining services
    super.serviceStart();

    if (delayedRpcServerStart) {
      waitForRecoveredContainers();
      server.start();

      // check that the node ID is as previously advertised
      connectAddress = NetUtils.getConnectAddress(server);
      NodeId serverNode = buildNodeId(connectAddress, hostOverride);
      if (!serverNode.equals(nodeId)) {
        throw new IOException("Node mismatch after server started, expected '"
            + nodeId + "' but found '" + serverNode + "'");
      }
    }

    LOG.info("ContainerManager started at " + connectAddress);
    LOG.info("ContainerManager bound to " + initialAddress);
  }

相关参数:
yarn.nodemanager.bind-host NM进程绑定IP,默认0.0.0.0
yarn.nodemanager.address NM进程绑定地址,一般配置为0.0.0.0:45454
yarn.ipc.rpc.class rpc类,默认是org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC
yarn.nodemanager.container-manager.thread-count rpc处理handler的线程数,默认20

ContainerManager子服务之ResourceLocalizationService

@Override
  public void serviceStart() throws Exception {
    cacheCleanup.scheduleWithFixedDelay(new CacheCleanup(dispatcher),
        cacheCleanupPeriod, cacheCleanupPeriod, TimeUnit.MILLISECONDS);
    //创建资源本地服务
    server = createServer();
    //启动服务
    server.start();
    localizationServerAddress =
        getConfig().updateConnectAddr(YarnConfiguration.NM_BIND_HOST,
                                      YarnConfiguration.NM_LOCALIZER_ADDRESS,
                                      YarnConfiguration.DEFAULT_NM_LOCALIZER_ADDRESS,
                                      server.getListenerAddress());
    LOG.info("Localizer started on port " + server.getPort());
    super.serviceStart();
  }
Server createServer() {
    Configuration conf = getConfig();
    YarnRPC rpc = YarnRPC.create(conf);
    if (UserGroupInformation.isSecurityEnabled()) {
      secretManager = new LocalizerTokenSecretManager();      
    }
    //参见LocalizationProtocol协议
    Server server = rpc.getServer(LocalizationProtocol.class, this,
        localizationServerAddress, conf, secretManager, 
        conf.getInt(YarnConfiguration.NM_LOCALIZER_CLIENT_THREAD_COUNT, 
            YarnConfiguration.DEFAULT_NM_LOCALIZER_CLIENT_THREAD_COUNT));
    
    // Enable service authorization?
    if (conf.getBoolean(
        CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, 
        false)) {
      server.refreshServiceAcl(conf, new NMPolicyProvider());
    }
    
    return server;
  }

相关参数:
localizer.client.thread-count 处理本地请求的线程数,默认5

ContainerManager子服务之ContainersLauncher

无serviceStart()

子服务之AuxServices

@Override
  public void serviceStart() throws Exception {
    // TODO fork(?) services running as configured user
    //      monitor for health, shutdown/restart(?) if any should die
    for (Map.Entry<String, AuxiliaryService> entry : serviceMap.entrySet()) {
      AuxiliaryService service = entry.getValue();
      String name = entry.getKey();
      service.start();
      service.registerServiceListener(this);
      ByteBuffer meta = service.getMetaData();
      if(meta != null) {
        serviceMetaData.put(name, meta);
      }
    }
    super.serviceStart();
  }

子服务之ContainerMonitor

@Override
  protected void serviceStart() throws Exception {
     //启动monitor线程,执行run()
    if (this.isEnabled()) {
      this.monitoringThread.start();
    }
    super.serviceStart();
  }
@Override
    public void run() {

      while (true) {

        // Print the processTrees for debugging.
        if (LOG.isDebugEnabled()) {
          StringBuilder tmp = new StringBuilder("[ ");
          for (ProcessTreeInfo p : trackingContainers.values()) {
            tmp.append(p.getPID());
            tmp.append(" ");
          }
          LOG.debug("Current ProcessTree list : "
              + tmp.substring(0, tmp.length()) + "]");
        }

        // Add new containers
        //添加新的container
        synchronized (containersToBeAdded) {
          for (Entry<ContainerId, ProcessTreeInfo> entry : containersToBeAdded
              .entrySet()) {
            ContainerId containerId = entry.getKey();
            ProcessTreeInfo processTreeInfo = entry.getValue();
            LOG.info("Starting resource-monitoring for " + containerId);
            trackingContainers.put(containerId, processTreeInfo);
          }
          containersToBeAdded.clear();
        }

        // Remove finished containers
        // 删除完成的container
        synchronized (containersToBeRemoved) {
          for (ContainerId containerId : containersToBeRemoved) {
            if (containerMetricsEnabled) {
              ContainerMetrics.forContainer(
                  containerId, containerMetricsPeriodMs).finished();
            }
            trackingContainers.remove(containerId);
            LOG.info("Stopping resource-monitoring for " + containerId);
          }
          containersToBeRemoved.clear();
        }

        // Now do the monitoring for the trackingContainers
        // Check memory usage and kill any overflowing containers
        // 检测所有已跟踪容器,如果资源使用超出限制则kill
        long vmemStillInUsage = 0;
        long pmemStillInUsage = 0;
        for (Iterator<Map.Entry<ContainerId, ProcessTreeInfo>> it =
            trackingContainers.entrySet().iterator(); it.hasNext();) {

          Map.Entry<ContainerId, ProcessTreeInfo> entry = it.next();
          ContainerId containerId = entry.getKey();
          ProcessTreeInfo ptInfo = entry.getValue();
          try {
            String pId = ptInfo.getPID();

            // Initialize any uninitialized processTrees
            //如果PID为空则需要重新初始化
            if (pId == null) {
              // get pid from ContainerId
              pId = containerExecutor.getProcessId(ptInfo.getContainerId());
              if (pId != null) {
                // pId will be null, either if the container is not spawned yet
                // or if the container's pid is removed from ContainerExecutor
                LOG.debug("Tracking ProcessTree " + pId
                    + " for the first time");

                ResourceCalculatorProcessTree pt =
                    ResourceCalculatorProcessTree.getResourceCalculatorProcessTree(pId, processTreeClass, conf);
                ptInfo.setPid(pId);
                ptInfo.setProcessTree(pt);

                if (containerMetricsEnabled) {
                  ContainerMetrics usageMetrics = ContainerMetrics
                      .forContainer(containerId, containerMetricsPeriodMs);
                  int cpuVcores = ptInfo.getCpuVcores();
                  final int vmemLimit = (int) (ptInfo.getVmemLimit() >> 20);
                  final int pmemLimit = (int) (ptInfo.getPmemLimit() >> 20);
                  usageMetrics.recordResourceLimit(
                      vmemLimit, pmemLimit, cpuVcores);
                  usageMetrics.recordProcessId(pId);
                }
              }
            }
            // End of initializing any uninitialized processTrees

            //初始化后仍然为空的则放弃跟踪
            if (pId == null) {
              continue; // processTree cannot be tracked
            }

            LOG.debug("Constructing ProcessTree for : PID = " + pId
                + " ContainerId = " + containerId);
            ResourceCalculatorProcessTree pTree = ptInfo.getProcessTree();
            pTree.updateProcessTree();    // update process-tree
            long currentVmemUsage = pTree.getVirtualMemorySize();
            long currentPmemUsage = pTree.getRssMemorySize();
            // if machine has 6 cores and 3 are used,
            // cpuUsagePercentPerCore should be 300% and
            // cpuUsageTotalCoresPercentage should be 50%
            float cpuUsagePercentPerCore = pTree.getCpuUsagePercent();
            float cpuUsageTotalCoresPercentage = cpuUsagePercentPerCore /
                resourceCalculatorPlugin.getNumProcessors();

            // Multiply by 1000 to avoid losing data when converting to int
            int milliVcoresUsed = (int) (cpuUsageTotalCoresPercentage * 1000
                * maxVCoresAllottedForContainers /nodeCpuPercentageForYARN);
            // as processes begin with an age 1, we want to see if there
            // are processes more than 1 iteration old.
            long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
            long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
            long vmemLimit = ptInfo.getVmemLimit();
            long pmemLimit = ptInfo.getPmemLimit();
            LOG.info(String.format(
                "Memory usage of ProcessTree %s for container-id %s: ",
                     pId, containerId.toString()) +
                formatUsageString(
                    currentVmemUsage, vmemLimit, currentPmemUsage, pmemLimit));

            // Add usage to container metrics
            if (containerMetricsEnabled) {
              ContainerMetrics.forContainer(
                  containerId, containerMetricsPeriodMs).recordMemoryUsage(
                  (int) (currentPmemUsage >> 20));
              ContainerMetrics.forContainer(
                  containerId, containerMetricsPeriodMs).recordCpuUsage
                  ((int)cpuUsagePercentPerCore, milliVcoresUsed);
            }

            boolean isMemoryOverLimit = false;
            String msg = "";
            int containerExitStatus = ContainerExitStatus.INVALID;
            if (isVmemCheckEnabled()
                && isProcessTreeOverLimit(containerId.toString(),
                    currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
              // Container (the root process) is still alive and overflowing
              // memory.
              // Dump the process-tree and then clean it up.
              msg = formatErrorMessage("virtual",
                  currentVmemUsage, vmemLimit,
                  currentPmemUsage, pmemLimit,
                  pId, containerId, pTree);
              isMemoryOverLimit = true;
              containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
            } else if (isPmemCheckEnabled()
                && isProcessTreeOverLimit(containerId.toString(),
                    currentPmemUsage, curRssMemUsageOfAgedProcesses,
                    pmemLimit)) {
              // Container (the root process) is still alive and overflowing
              // memory.
              // Dump the process-tree and then clean it up.
              msg = formatErrorMessage("physical",
                  currentVmemUsage, vmemLimit,
                  currentPmemUsage, pmemLimit,
                  pId, containerId, pTree);
              isMemoryOverLimit = true;
              containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
            }

            if (isMemoryOverLimit) {
              // Virtual or physical memory over limit. Fail the container and
              // remove
              // the corresponding process tree
              LOG.warn(msg);
              // warn if not a leader
              if (!pTree.checkPidPgrpidForMatch()) {
                LOG.error("Killed container process with PID " + pId
                    + " but it is not a process group leader.");
              }
              // kill the container
              eventDispatcher.getEventHandler().handle(
                  new ContainerKillEvent(containerId,
                      containerExitStatus, msg));
              it.remove();
              LOG.info("Removed ProcessTree with root " + pId);
            } else {
              // Accounting the total memory in usage for all containers that
              // are still
              // alive and within limits.
              vmemStillInUsage += currentVmemUsage;
              pmemStillInUsage += currentPmemUsage;
            }
          } catch (Exception e) {
            // Log the exception and proceed to the next container.
            LOG.warn("Uncaught exception in ContainerMemoryManager "
                + "while managing memory of " + containerId, e);
          }
        }

        try {
          Thread.sleep(monitoringInterval);
        } catch (InterruptedException e) {
          LOG.warn(ContainersMonitorImpl.class.getName()
              + " is interrupted. Exiting.");
          break;
        }
      }
    }

    private String formatErrorMessage(String memTypeExceeded,
        long currentVmemUsage, long vmemLimit,
        long currentPmemUsage, long pmemLimit,
        String pId, ContainerId containerId, ResourceCalculatorProcessTree pTree) {
      return
        String.format("Container [pid=%s,containerID=%s] is running beyond %s memory limits. ",
            pId, containerId, memTypeExceeded) +
        "Current usage: " +
        formatUsageString(currentVmemUsage, vmemLimit,
                          currentPmemUsage, pmemLimit) +
        ". Killing container.\n" +
        "Dump of the process-tree for " + containerId + " :\n" +
        pTree.getProcessTreeDump();
    }

    private String formatUsageString(long currentVmemUsage, long vmemLimit,
        long currentPmemUsage, long pmemLimit) {
      return String.format("%sB of %sB physical memory used; " +
          "%sB of %sB virtual memory used",
          TraditionalBinaryPrefix.long2String(currentPmemUsage, "", 1),
          TraditionalBinaryPrefix.long2String(pmemLimit, "", 1),
          TraditionalBinaryPrefix.long2String(currentVmemUsage, "", 1),
          TraditionalBinaryPrefix.long2String(vmemLimit, "", 1));
    }
  }

WebServer

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值