Hadoop2.8.5 作业的受理

上一篇作业的提交最终流转到RMAppManager手中,RM节点上的ClientRMService对象相当于接待站,而RMApp Manager对象则专门管理与作业的申请和运行相关。两个对象均由 ResourceManager 创建,都在同一个 JVM 上。ClientRMService 是 通 过 调 用 rmAppManager. submitApplication ()把 作 业 申 请 交 到RMAppManager 手里的。我们接着从这里切入。

1. 作业状态调度

hadoop-yarn-server-resourcemanager\src\main\java\org\apache\hadoop\yarn\server\resourcemanager\RMAppManager.java

protected void submitApplication(
      ApplicationSubmissionContext submissionContext, long submitTime,
      String user) throws YarnException {
    ApplicationId applicationId = submissionContext.getApplicationId();
    //首先创建一个RMAppImpl
    RMAppImpl application =
        createAndPopulateNewRMApp(submissionContext, submitTime, user, false);
    //处理安全认证
    Credentials credentials = null;
    try {
      credentials = parseCredentials(submissionContext);
      if (UserGroupInformation.isSecurityEnabled()) {
        this.rmContext.getDelegationTokenRenewer()
            .addApplicationAsync(applicationId, credentials,
                submissionContext.getCancelTokensWhenComplete(),
                application.getUser());
      } else {
        // Dispatcher is not yet started at this time, so these START events
        // enqueued should be guaranteed to be first processed when dispatcher
        // gets started.
        //驱动RMAppImpl的状态机开始运行
        this.rmContext.getDispatcher().getEventHandler()
            .handle(new RMAppEvent(applicationId, RMAppEventType.START));
      }
    } catch (Exception e) {
      //出现异常,拒绝处理
      assert application.getState() == RMAppState.NEW;
      this.rmContext.getDispatcher().getEventHandler()
          .handle(new RMAppEvent(applicationId,
              RMAppEventType.APP_REJECTED, e.getMessage()));
      throw RPCUtil.getRemoteException(e);
    }
  }

hadoop-yarn-server-resourcemanager\src\main\java\org\apache\hadoop\yarn\server\resourcemanager\ResourceManager.java
由ApplicationEventDispatcher 转发给RMAppImpl处理

public class ResourceManager extends CompositeService implements Recoverable {
  private Dispatcher rmDispatcher; // AsyncDispatcher
  //注册能接收RMAppEventType类型事件的Handler
  rmDispatcher.register(RMAppEventType.class, new ApplicationEventDispatcher(rmContext));
  //命名为Dispatcher实际为Handler
  public static final class ApplicationEventDispatcher implements EventHandler<RMAppEvent> {
    private final RMContext rmContext;
    public ApplicationEventDispatcher(RMContext rmContext) {
      this.rmContext = rmContext;
    }
    @Override
    public void handle(RMAppEvent event) {
      ApplicationId appID = event.getApplicationId();
      RMApp rmApp = this.rmContext.getRMApps().get(appID);
      if (rmApp != null) {
        try {
          rmApp.handle(event); //交给具体的RMAppImpl处理
        } catch (Throwable t) {
          ......
        }
      }
    }
  }
}

hadoop-yarn-server-resourcemanager\src\main\java\org\apache\hadoop\yarn\server\resourcemanager\rmapp\RMAppImpl.java
事件RMAppEventType.START的实际处理者


public interface RMApp extends EventHandler<RMAppEvent> {}

public class RMAppImpl implements RMApp, Recoverable {
  //状态机跳变规则 RMAppNewlySavingTransition 后下一状态 NEW_SAVING
 .addTransition(RMAppState.NEW, RMAppState.NEW_SAVING,
        RMAppEventType.START, new RMAppNewlySavingTransition())
  
  @Override
  public void handle(RMAppEvent event) {
    this.writeLock.lock();
    try {
      ApplicationId appID = event.getApplicationId();
      final RMAppState oldState = getState();
      try {
        //驱动状态机
        this.stateMachine.doTransition(event.getType(), event);
      } catch (InvalidStateTransitionException e) {
        ......
        onInvalidStateTransition(event.getType(), oldState);
      }
    } finally {
      this.writeLock.unlock();
    }
  }
  
  //实务处理
  private static final class RMAppNewlySavingTransition extends RMAppTransition {
    @Override
    public void transition(RMAppImpl app, RMAppEvent event) {
      app.rmContext.getStateStore().storeNewApplication(app); //保存应用信息 RMStateStore
    }
  }

}

resourcemanager\src\main\java\org\apache\hadoop\yarn\server\resourcemanager\recovery\RMStateStore.java
//保存作业状态信息, 派生类有 FileSystemRMStateStore 、 MemoryRMStateStore 、 NullRMStateStore 以及 ZKRMStateStore

public abstract class RMStateStore extends AbstractService {
  public void storeNewApplication(RMApp app) {
    ApplicationSubmissionContext context = app.getApplicationSubmissionContext();
    assert context instanceof ApplicationSubmissionContextPBImpl;
    ApplicationStateData appState =
        ApplicationStateData.newInstance(app.getSubmitTime(),
            app.getStartTime(), context, app.getUser(), app.getCallerContext());
     //事件类型为 RMStateStoreEventType.STORE _ APP
    dispatcher.getEventHandler().handle(new RMStateStoreAppEvent(appState));
  }
}

对应的事务处理

private static class StoreAppTransition
      implements MultipleArcTransition<RMStateStore, RMStateStoreEvent,
          RMStateStoreState> {
    @Override
    public RMStateStoreState transition(RMStateStore store,
        RMStateStoreEvent event) {
      boolean isFenced = false;
      ApplicationStateData appState = ((RMStateStoreAppEvent) event).getAppState();
      ApplicationId appId = appState.getApplicationSubmissionContext().getApplicationId();
      try {
        store.storeApplicationStateInternal(appId, appState);
        //
        store.notifyApplication(new RMAppEvent(appId, RMAppEventType.APP_NEW_SAVED));
      }
      return finalState(isFenced);
    };
  }

hadoop-yarn-server-resourcemanager\src\main\java\org\apache\hadoop\yarn\server\resourcemanager\rmapp\RMAppImpl.java
APP_NEW_SAVED将驱动当前 NEW _ SAVING 变为 SUBMITTED


 addTransition ( RMAppState.NEW _ SAVING , RMAppState.SUBMITTED ,
 RMAppEventType.APP _ NEW _ SAVED , newAddApplicationToSchedulerTransition ())

  private static final class AddApplicationToSchedulerTransition extends
      RMAppTransition {
    @Override
    public void transition(RMAppImpl app, RMAppEvent event) {
      //发送调度事件
      app.handler.handle(new AppAddedSchedulerEvent(app.user,
          app.submissionContext, false));
      // send the ATS create Event
      app.sendATSCreateEvent();
    }
  }
  

AppAddedSchedulerEvent事件的事件类型

public class AppAddedSchedulerEvent extends SchedulerEvent {
 public AppAddedSchedulerEvent(ApplicationId applicationId, String queue,
      String user, boolean isAppRecovering, ReservationId reservationID,
      Priority appPriority) {
    super(SchedulerEventType.APP_ADDED); //事件类型
    this.applicationId = applicationId;
    this.queue = queue;
    this.user = user;
    this.reservationID = reservationID;
    this.isAppRecovering = isAppRecovering;
    this.appPriority = appPriority;
  }
}

resourcemanager\src\main\java\org\apache\hadoop\yarn\server\resourcemanager\ResourceManager.java
调度转发器 SchedulerEventDispatcher

 @Private
  public static class SchedulerEventDispatcher extends AbstractService
      implements EventHandler<SchedulerEvent> {
    //资源调度
    private final ResourceScheduler scheduler;
    //调度队列
    private final BlockingQueue<SchedulerEvent> eventQueue = new LinkedBlockingQueue<SchedulerEvent>();
    private volatile int lastEventQueueSizeLogged = 0;
    //处理线程
    private final Thread eventProcessor;
    private volatile boolean stopped = false;
    private boolean shouldExitOnError = false;

    public SchedulerEventDispatcher(ResourceScheduler scheduler) {
      super(SchedulerEventDispatcher.class.getName());
      this.scheduler = scheduler;
      this.eventProcessor = new Thread(new EventProcessor());
      this.eventProcessor.setName("ResourceManager Event Processor");
    }

    @Override
    protected void serviceInit(Configuration conf) throws Exception {
      this.shouldExitOnError =
          conf.getBoolean(Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY,
            Dispatcher.DEFAULT_DISPATCHER_EXIT_ON_ERROR);
      super.serviceInit(conf);
    }

    @Override
    protected void serviceStart() throws Exception {
      this.eventProcessor.start();
      super.serviceStart();
    }

    private final class EventProcessor implements Runnable {
      @Override
      public void run() {

        SchedulerEvent event;

        while (!stopped && !Thread.currentThread().isInterrupted()) {
          try {
            event = eventQueue.take(); //从队列中取出一个事件
          } catch (InterruptedException e) {
            LOG.error("Returning, interrupted : " + e);
            return; // TODO: Kill RM.
          }

          try {
            scheduler.handle(event); //调度处理
          } catch (Throwable t) {
            ......
          }
        }
      }
    }

    @Override
    protected void serviceStop() throws Exception {
      this.stopped = true;
      this.eventProcessor.interrupt();
      try {
        this.eventProcessor.join();
      } catch (InterruptedException e) {
        throw new YarnRuntimeException(e);
      }
      super.serviceStop();
    }

    @Override
    public void handle(SchedulerEvent event) {
      try {
        int qSize = eventQueue.size();
        if (qSize != 0 && qSize % 1000 == 0
            && lastEventQueueSizeLogged != qSize) {
          lastEventQueueSizeLogged = qSize;
          LOG.info("Size of scheduler event-queue is " + qSize);
        }
        int remCapacity = eventQueue.remainingCapacity();
        if (remCapacity < 1000) {
          LOG.info("Very low remaining capacity on scheduler event queue: "
              + remCapacity);
        }
        this.eventQueue.put(event); //挂入队列
      } catch (InterruptedException e) {
        LOG.info("Interrupted. Trying to exit gracefully.");
      }
    }
  }

以FifoScheduler为例,来考察

public class FifoScheduler extends AbstractYarnScheduler<FiCaSchedulerApp, FiCaSchedulerNode> implements  Configurable {
  @Override
  public void handle(SchedulerEvent event) {
    switch(event.getType()) {
    case NODE_ADDED:
    ......
    break;
    case NODE_REMOVED:
    ......
    break;
    case NODE_RESOURCE_UPDATE:
    ......
    break;
    case NODE_UPDATE:
    ......
    break;
    case APP_ADDED:
    {
      AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
      addApplication(appAddedEvent.getApplicationId(), appAddedEvent.getQueue(), appAddedEvent.getUser(),
        appAddedEvent.getIsAppRecovering());
    }
    break;
}
  @VisibleForTesting
  public synchronized void addApplication(ApplicationId applicationId,
      String queue, String user, boolean isAppRecovering) {
    SchedulerApplication<FiCaSchedulerApp> application = new SchedulerApplication<FiCaSchedulerApp>(DEFAULT_QUEUE, user);
    applications.put(applicationId, application);
    metrics.submitApp(user);
    if (isAppRecovering) {
       ......
    } else {
      rmContext.getDispatcher().getEventHandler()
        .handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
    }
  }

2. 作业发起运行调度

resourcemanager\src\main\java\org\apache\hadoop\yarn\server\resourcemanager\rmapp\RMAppImpl.java

  .addTransition(RMAppState.SUBMITTED, RMAppState.ACCEPTED,
          RMAppEventType.APP_ACCEPTED, new StartAppAttemptTransition())
  //开始一次启动运行的尝试
  private static final class StartAppAttemptTransition extends RMAppTransition {
    @Override
    public void transition(RMAppImpl app, RMAppEvent event) {
      app.createAndStartNewAttempt(false);
    };
  }
  //发起事件
  private void createAndStartNewAttempt(boolean transferStateFromPreviousAttempt) {
    //创建一个新的 RMAppAttemptImpl ,并将其设置成 currentAttempt
    createNewAttempt(); 
    handler.handle(new RMAppStartAttemptEvent(currentAttempt.getAppAttemptId(),
      transferStateFromPreviousAttempt));
  }

resourcemanager\src\main\java\org\apache\hadoop\yarn\server\resourcemanager\ResourceManager.java
Attemp事件转发

 @Private
  public static final class ApplicationAttemptEventDispatcher implements
      EventHandler<RMAppAttemptEvent> {
    private final RMContext rmContext;
    public ApplicationAttemptEventDispatcher(RMContext rmContext) {
      this.rmContext = rmContext;
    }
    @Override
    public void handle(RMAppAttemptEvent event) {
      ApplicationAttemptId appAttemptID = event.getApplicationAttemptId();
      ApplicationId appAttemptId = appAttemptID.getApplicationId();
      RMApp rmApp = this.rmContext.getRMApps().get(appAttemptId);
      if (rmApp != null) {
        RMAppAttempt rmAppAttempt = rmApp.getRMAppAttempt(appAttemptID);
        if (rmAppAttempt != null) {
          try {
            rmAppAttempt.handle(event);
          } catch (Throwable t) {
            ......
          }
        }
      }
    }
  }

驱动状态机

public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {  
    //跳变规则
    addTransition ( RMAppAttemptState.NEW , RMAppAttemptState.SUBMITTED ,
                   RMAppAttemptEventType. START , newAttemptStartedTransition ())
    public void handle(RMAppAttemptEvent event) {
    	this.stateMachine.doTransition(event.getType(), event);
    }
}

尝试事件调度

  private static final class AttemptStartedTransition extends BaseTransition {
	@Override
    public void transition(RMAppAttemptImpl appAttempt,
        RMAppAttemptEvent event) {
      //向 ApplicationMasterService 登记一次 RMAppAttempt
      // Register with the ApplicationMasterService
      appAttempt.masterService
          .registerAppAttempt(appAttempt.applicationAttemptId);

      if (UserGroupInformation.isSecurityEnabled()) {
        appAttempt.clientTokenMasterKey =
            appAttempt.rmContext.getClientToAMTokenSecretManager()
              .createMasterKey(appAttempt.applicationAttemptId);
      }

      // Add the applicationAttempt to the scheduler and inform the scheduler
      // whether to transfer the state from previous attempt.
      //事件类型  SchedulerEventType.APP _ ATTEMPT _ ADDED,由 FifoScheduler处理
      appAttempt.eventHandler.handle(new AppAttemptAddedSchedulerEvent(
        appAttempt.applicationAttemptId, transferStateFromPreviousAttempt));
    }
  }

resourcemanager\src\main\java\org\apache\hadoop\yarn\server\resourcemanager\scheduler\fifo\FifoScheduler.java

 public synchronized void
      addApplicationAttempt(ApplicationAttemptId appAttemptId,
          boolean transferStateFromPreviousAttempt,
          boolean isAttemptRecovering) {
    SchedulerApplication<FiCaSchedulerApp> application =
        applications.get(appAttemptId.getApplicationId());
    String user = application.getUser();
    // TODO: Fix store
    FiCaSchedulerApp schedulerApp =
        new FiCaSchedulerApp(appAttemptId, user, DEFAULT_QUEUE,
          activeUsersManager, this.rmContext);
    if (transferStateFromPreviousAttempt) {
      schedulerApp.transferStateFromPreviousAttempt(application
        .getCurrentAppAttempt());
    }
    application.setCurrentAppAttempt(schedulerApp);
    metrics.submitAppAttempt(user);
    } else {
    //新加入的应用
      rmContext.getDispatcher().getEventHandler().handle(
        new RMAppAttemptEvent(appAttemptId,
            RMAppAttemptEventType.ATTEMPT_ADDED));
    }
  }

hadoop\yarn\server\resourcemanager\rmapp\attempt\RMAppAttemptImpl.java

//跳变规则
.addTransition(RMAppAttemptState.SUBMITTED, 
          EnumSet.of(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING,
                     RMAppAttemptState.SCHEDULED),
          RMAppAttemptEventType.ATTEMPT_ADDED,
          new ScheduleTransition())
public static final class ScheduleTransition implements
      MultipleArcTransition<RMAppAttemptImpl, RMAppAttemptEvent, RMAppAttemptState> {
    @Override
    public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
        RMAppAttemptEvent event) {
      ApplicationSubmissionContext subCtx = appAttempt.submissionContext;
      if (!subCtx.getUnmanagedAM()) {  //如果不是 UnmanagedAM ,就得为其创建 AM
        appAttempt.amReq.setNumContainers(1);// 只要求一个容器,用于创建 AM
        appAttempt.amReq.setPriority(AM_CONTAINER_PRIORITY);
        appAttempt.amReq.setResourceName(ResourceRequest.ANY);
        appAttempt.amReq.setRelaxLocality(true);

        appAttempt.getAMBlacklistManager().refreshNodeHostCount(
            appAttempt.scheduler.getNumClusterNodes());

        ResourceBlacklistRequest amBlacklist =
            appAttempt.getAMBlacklistManager().getBlacklistUpdates();
        if (LOG.isDebugEnabled()) {
          LOG.debug("Using blacklist for AM: additions(" +
              amBlacklist.getBlacklistAdditions() + ") and removals(" +
              amBlacklist.getBlacklistRemovals() + ")");
        }
        // 分配资源
        Allocation amContainerAllocation =
            appAttempt.scheduler.allocate(
                appAttempt.applicationAttemptId,
                Collections.singletonList(appAttempt.amReq),
                EMPTY_CONTAINER_RELEASE_LIST,
                amBlacklist.getBlacklistAdditions(),
                amBlacklist.getBlacklistRemovals(), null, null);
        if (amContainerAllocation != null
            && amContainerAllocation.getContainers() != null) {
          assert (amContainerAllocation.getContainers().size() == 0);
        }
        return RMAppAttemptState.SCHEDULED;  // RMAppAttemptImpl 状态机的新状态
      } else {
        // save state and then go to LAUNCHED state
        appAttempt.storeAttempt();
        return RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING;
      }
    }
  }

hadoop\yarn\server\resourcemanager\scheduler\fifo\FifoScheduler.java
开始分配容器

@Override
  public Allocation allocate(ApplicationAttemptId applicationAttemptId,
      List<ResourceRequest> ask, List<ContainerId> release,
      List<String> blacklistAdditions, List<String> blacklistRemovals,
      List<UpdateContainerRequest> increaseRequests,
      List<UpdateContainerRequest> decreaseRequests) {
      //代表着要求分配资源的 AppAttempt
    FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId);
    // 资源要求的合理性检测和规格化
    SchedulerUtils.normalizeRequests(ask, resourceCalculator, 
        clusterResource, minimumAllocation, getMaximumResourceCapability());
    // 释放该释放的容器
    releaseContainers(release, application);
    synchronized (application) {
      if (!ask.isEmpty()) { // 要求分配的资源集合非空
        application.showRequests();
        // Update application requests
        application.updateResourceRequests(ask);
        application.showRequests();
      }
      //更新黑名单
      application.updateBlacklist(blacklistAdditions, blacklistRemovals);
      Resource headroom = application.getHeadroom();
      application.setApplicationHeadroomForMetrics(headroom);
      //所返回的 ContainersAndNMTokensAllocation 对象成为前面的 allocation
      return new Allocation(application.pullNewlyAllocatedContainers(),
          headroom, null, null, null, application.pullUpdatedNMTokens());
    }
  }

RM管理着事件的转发,本片中主要转发RMAppImpl,RMAppAttemptImpl,FifoScheduler, RMStateStore几者之间的状态机事件转发。最终走到FifoScheduler的容器分配。关于容器的分配是一个大主题,下次再考察。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值