ApplicationMaster 获取Container并执行的过程

ApplicationMaster 获取Container并执行的过程

添加任务到调度器

当RMAppImpl 收到RMAppEventType.APP_NEW_SAVED事件后,触发SchedulerEventType.APP_ADDED事件

.addTransition(RMAppState.NEW_SAVING, RMAppState.SUBMITTED,
    RMAppEventType.APP_NEW_SAVED, new AddApplicationToSchedulerTransition())
  private static final class AddApplicationToSchedulerTransition extends
      RMAppTransition {
    @Override
    public void transition(RMAppImpl app, RMAppEvent event) {
      app.handler.handle(new AppAddedSchedulerEvent(app.applicationId,
        app.submissionContext.getQueue(), app.user,
        app.submissionContext.getReservationID()));
    }
  }

在该transition方法中,发送了SchedulerEventType.APP_ADDED 事件,而该事件在ResourceScheduler中注册,ResourceScheduler有多种实现,这里以FairSchedule为例,当FairSchedule收到RMApp发送 SchedulerEventType.APP_ADDED 事件后,获取任务的相关信息(作业Id,队列名,用户名及保留Id)

// org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler
  public void handle(SchedulerEvent event) {
      ......
      switch (event.getType()) {
      if (!(event instanceof AppAddedSchedulerEvent)) {
        throw new RuntimeException("Unexpected event type: " + event);
      }
      AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
      String queueName =
          resolveReservationQueueName(appAddedEvent.getQueue(),
              appAddedEvent.getApplicationId(),
              appAddedEvent.getReservationID());
      if (queueName != null) {
        addApplication(appAddedEvent.getApplicationId(),
            queueName, appAddedEvent.getUser(),
            appAddedEvent.getIsAppRecovering());
      }
      break;
      ......
   }

然后在调度器中添加该应用,之后发送RMAppEventType.APP_ACCEPTED事件

// org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler
 protected synchronized void addApplication(ApplicationId applicationId,
      String queueName, String user, boolean isAppRecovering) {
     
    // 当queueName为null、空、以 . 开头或结尾时,拒绝加入调度器,并发送RMAppEventType.APP_REJECTED,然后直接返回  
     
    RMApp rmApp = rmContext.getRMApps().get(applicationId);
    // 根据队列的放置策略,为该应用分配指定的队列,然后在该rmApp中设置该队列,然后返回该队列
    FSLeafQueue queue = assignToQueue(rmApp, queueName, user);
    if (queue == null) {
      return;
    }

    // Enforce ACLs

    // RBAC check
	
    SchedulerApplication<FSAppAttempt> application =
        new SchedulerApplication<FSAppAttempt>(queue, user);
    // 将该任务添加到调度器 ConcurrentMap<ApplicationId, SchedulerApplication<T>> applications;
    applications.put(applicationId, application);
    queue.getMetrics().submitApp(user);

    LOG.info("Accepted application " + applicationId + " from user: " + user
            + ", in queue: " + queueName + "(assigned=" + queue.getName() + ")" + ", currently num of applications: "
            + applications.size());
    if (isAppRecovering) {
      if (LOG.isDebugEnabled()) {
        LOG.debug(applicationId + " is recovering. Skip notifying APP_ACCEPTED");
      }
    } else {
      rmContext.getDispatcher().getEventHandler()
        .handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
    }
  }

创建RMAppAttempt

在RMApp中注册了该事件,当收到该事件时,创建并启动一个新的RMAppAttempt;真正创建RMAppAttempt是通过发送RMAppStartAttemptEvent进行创建

.addTransition(RMAppState.SUBMITTED, RMAppState.ACCEPTED,
    RMAppEventType.APP_ACCEPTED, new StartAppAttemptTransition())

创建RMAppAttempt,并将其添加到RMApp下的 attempts 集合中

Map<ApplicationAttemptId, RMAppAttempt> attempts = new LinkedHashMap<>();
 //org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl 
 private static final class StartAppAttemptTransition extends RMAppTransition {
    @Override
    public void transition(RMAppImpl app, RMAppEvent event) {
      app.createAndStartNewAttempt(false);
    };
  }
 private void createAndStartNewAttempt(boolean transferStateFromPreviousAttempt) {
    createNewAttempt();
    // 发送的这个事件中,携带着刚创建的RMAttempt
    handler.handle(new RMAppStartAttemptEvent(currentAttempt.getAppAttemptId(),
      transferStateFromPreviousAttempt));
  }
 private void createNewAttempt() {
    ApplicationAttemptId appAttemptId =
        ApplicationAttemptId.newInstance(applicationId, attempts.size() + 1);
    RMAppAttempt attempt =
        new RMAppAttemptImpl(appAttemptId, rmContext, scheduler, masterService,
          submissionContext, conf,
          // 如果(先前失败的尝试次数(不应包括抢占,硬件错误和NM重新同步)+1)等于最大尝试限制,则新创建的尝试可能是最后一次尝试。
          maxAppAttempts == (getNumFailedAppAttempts() + 1), amReq);
    attempts.put(appAttemptId, attempt);
    currentAttempt = attempt;
  }

在RMAppAttempt中注册了该事件,

 .addTransition(RMAppAttemptState.NEW, RMAppAttemptState.SUBMITTED,
     RMAppAttemptEventType.START, new AttemptStartedTransition())

当RMAppAttempt收到RMAppAttemptEventType.START事件后,向RM注册该RMAppAttempt,并发送SchedulerEventType.APP_ATTEMPT_ADDED,将其添加到调度器

  // org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl 	
  private static final class AttemptStartedTransition extends BaseTransition {
	@Override
    public void transition(RMAppAttemptImpl appAttempt,
        RMAppAttemptEvent event) {
	  // 判断是新的RMAppAttempt还是从之前的RMAppAttempt中转换过来的
	  boolean transferStateFromPreviousAttempt = false;
      if (event instanceof RMAppStartAttemptEvent) {
        transferStateFromPreviousAttempt =
            ((RMAppStartAttemptEvent) event)
              .getTransferStateFromPreviousAttempt();
      }
      appAttempt.startTime = System.currentTimeMillis();

      // Register with the ApplicationMasterService
      appAttempt.masterService
          .registerAppAttempt(appAttempt.applicationAttemptId);

      // UGI验证

      // 将applicationAttempt添加到调度程序,并通知调度程序是否从先前的尝试转移状态。
      appAttempt.eventHandler.handle(new AppAttemptAddedSchedulerEvent(
        appAttempt.applicationAttemptId, transferStateFromPreviousAttempt));
    }
  }

这里先看将RMAppAttempt添加到调度器的流程,在FairScheduler中会处理该事件

 // org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler 
 public void handle(SchedulerEvent event) {
	case APP_ATTEMPT_ADDED:
      if (!(event instanceof AppAttemptAddedSchedulerEvent)) {
        throw new RuntimeException("Unexpected event type: " + event);
      }
      AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
          (AppAttemptAddedSchedulerEvent) event;
      addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
        appAttemptAddedEvent.getTransferStateFromPreviousAttempt(),
        appAttemptAddedEvent.getIsAttemptRecovering());
      break;
  }

将新创建的AppAttempt添加到调度器,判断其这个AppAttempt是否可运行,如果可运行则将其加入可运行列表,并递增父队列和用户的可运行应用数,然后如果该AppAttempt不是从之前恢复的,则发送RMAppAttemptEventType.ATTEMPT_ADDED

  // org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler 
  protected synchronized void addApplicationAttempt(
      ApplicationAttemptId applicationAttemptId,
      boolean transferStateFromPreviousAttempt,
      boolean isAttemptRecovering) {
    SchedulerApplication<FSAppAttempt> application =
        applications.get(applicationAttemptId.getApplicationId());
    String user = application.getUser();
    FSLeafQueue queue = (FSLeafQueue) application.getQueue();

    FSAppAttempt attempt =
        new FSAppAttempt(this, applicationAttemptId, user,
            queue, new ActiveUsersManager(getRootQueueMetrics()),
            rmContext);
    if (transferStateFromPreviousAttempt) {
      attempt.transferStateFromPreviousAttempt(application
          .getCurrentAppAttempt());
    }
    application.setCurrentAppAttempt(attempt);
	// 检查应用程序运行是否会超出maxRunningApps限制
    boolean runnable = maxRunningEnforcer.canAppBeRunnable(queue, user);
    // 如果如果当前attempt为可运行为可运行,则添加到可运行列表,不可运行添加到不可运行列表
    queue.addApp(attempt, runnable);
    // 如果当前attempt为可运行,递归增加父队列的应用数,并且usersNumRunnableApps+1
    if (runnable) {
      maxRunningEnforcer.trackRunnableApp(attempt);
    } else {
      maxRunningEnforcer.trackNonRunnableApp(attempt);
    }
    
    queue.getMetrics().submitAppAttempt(user);

    LOG.info("Added Application Attempt " + applicationAttemptId
        + " to scheduler from user: " + user);

    if (isAttemptRecovering) {
      if (LOG.isDebugEnabled()) {
        LOG.debug(applicationAttemptId
            + " is recovering. Skipping notifying ATTEMPT_ADDED");
      }
    } else {
      rmContext.getDispatcher().getEventHandler().handle(
        new RMAppAttemptEvent(applicationAttemptId,
            RMAppAttemptEventType.ATTEMPT_ADDED));
    }
  }

在RMAppAttemptImpl中注册了该事件,

// Transitions from SUBMITTED state
.addTransition(RMAppAttemptState.SUBMITTED, 
    EnumSet.of(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING,
               RMAppAttemptState.SCHEDULED	),
    RMAppAttemptEventType.ATTEMPT_ADDED,

向RM注册新的RMAppAttempt

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值