概述
submitApplication会经历一系列Event:
- start类型的RMAppEvent
- storeApp类型的RMStateStoreEvent
- newSaved类型的RMAppEvent
- appAdded类型的SchedulerEvent
- Accepted类型的RMAppEvent
在处理storeApp类型的RMStateStoreEvent过程中,存储app的信息到RMStateStore。
在处理appAdded类型的SchedulerEvent过程中,记录该app id和app到AbstractYarnScheduler的并发map集合中。
RMAppNewlySavingTransition处理start类型的RMAppEvent事件
如果yarn.resourcemanager.recovery.enabled配置参数为true,则通过异步的方式存储app的信息到RMStateStore。在RM重启时会从RMStateStore获取app的信息进行状态恢复。
private static final class RMAppNewlySavingTransition extends RMAppTransition {
@Override
public void transition(RMAppImpl app, RMAppEvent event) {
........
// If recovery is enabled then store the application information in a
// non-blocking call so make sure that RM has stored the information
// needed to restart the AM after RM restart without further client
// communication
LOG.info("Storing application with id " + app.applicationId);
app.rmContext.getStateStore().storeNewApplication(app);
}
}
将storeApp类型的RMStateStoreEvent事件提交给中央异步调度器AsyncDispatcher
将要存储的app的相关信息封装成ApplicationStateData。
创建storeApp类型的RMStateStoreEvent事,并提交给中央异步调度器AsyncDispatcher
//RMStateStore.java
/**
* Non-Blocking API
* ResourceManager services use this to store the application's state
* This does not block the dispatcher threads
* RMAppStoredEvent will be sent on completion to notify the RMApp
*/
@SuppressWarnings("unchecked")
public void storeNewApplication(RMApp app) {
ApplicationSubmissionContext context = app
.getApplicationSubmissionContext();
assert context instanceof ApplicationSubmissionContextPBImpl;
ApplicationStateData appState =
ApplicationStateData.newInstance(app.getSubmitTime(),
app.getStartTime(), context, app.getUser(), app.getCallerContext());
appState.setApplicationTimeouts(app.getApplicationTimeouts());
getRMStateStoreEventHandler().handle(new RMStateStoreAppEvent(appState));
}
//RMStateStoreAppEvent.java
public RMStateStoreAppEvent(ApplicationStateData appState) {
super(RMStateStoreEventType.STORE_APP);
this.appState = appState;
}
AsyncDispatcher dispatcher;
@SuppressWarnings("rawtypes")
@VisibleForTesting
protected EventHandler rmStateStoreEventHandler;
@Override
protected void serviceInit(Configuration conf) throws Exception{
// create async handler
dispatcher = new AsyncDispatcher("RM StateStore dispatcher");
dispatcher.init(conf);
rmStateStoreEventHandler = new ForwardingEventHandler();
//AsyncDispatcher注册处理RMStateStoreEventType的处理器—— ForwardingEventHandler
dispatcher.register(RMStateStoreEventType.class,
rmStateStoreEventHandler);
..........
}
@SuppressWarnings("rawtypes")
protected EventHandler getRMStateStoreEventHandler() {
return dispatcher.getEventHandler();
}
AsyncDispatcher将RMStateStoreEvent分发给ForwardingEventHandler处理
ForwardingEventHandler是RMStateStore的内部类,它会调用RMStateStore#handleStoreEvent()方法处理RMStateStoreEvent。该方法会调用状态机处理相应类型的RMStateStoreEvent。
/**
* EventHandler implementation which forward events to the FSRMStateStore
* This hides the EventHandle methods of the store from its public interface
*/
private final class ForwardingEventHandler
implements EventHandler<RMStateStoreEvent> {
@Override
public void handle(RMStateStoreEvent event) {
handleStoreEvent(event);
}
}
// Dispatcher related code
protected void handleStoreEvent(RMStateStoreEvent event) {
this.writeLock.lock();
try {
if (LOG.isDebugEnabled()) {
LOG.debug("Processing event of type " + event.getType());
}
final RMStateStoreState oldState = getRMStateStoreState();
this.stateMachine.doTransition(event.getType(), event);
if (oldState != getRMStateStoreState()) {
LOG.info("RMStateStore state change from " + oldState + " to "
+ getRMStateStoreState());
}
} catch (InvalidStateTransitionException e) {
LOG.error("Can't handle this event at current state", e);
} finally {
this.writeLock.unlock();
}
}
ForwardingEventHandler委托给StoreAppTransition处理storeApp类型的RMStateStoreEvent
StoreAppTransition调用RMStateStore#storeApplicationStateInternal()方法存储app的信息做状态恢复用,然后创建newSaved类型的RMAppEvent,并提交给中央异步调度器AsyncDispatcher。
//RMStateStore.java
stateMachineFactory = new StateMachineFactory<RMStateStore,
RMStateStoreState,
RMStateStoreEventType,
RMStateStoreEvent>(
RMStateStoreState.ACTIVE)
.addTransition(RMStateStoreState.ACTIVE,
EnumSet.of(RMStateStoreState.ACTIVE, RMStateStoreState.FENCED),
RMStateStoreEventType.STORE_APP, new StoreAppTransition())
private static class StoreAppTransition
implements MultipleArcTransition<RMStateStore, RMStateStoreEvent,
RMStateStoreState> {
@Override
public RMStateStoreState transition(RMStateStore store,
RMStateStoreEvent event) {
if (!(event instanceof RMStateStoreAppEvent)) {
// should never happen
LOG.error("Illegal event type: " + event.getClass());
return RMStateStoreState.ACTIVE;
}
boolean isFenced = false;
ApplicationStateData appState =
((RMStateStoreAppEvent) event).getAppState();
ApplicationId appId =
appState.getApplicationSubmissionContext().getApplicationId();
LOG.info("Storing info for app: " + appId);
try {
store.storeApplicationStateInternal(appId, appState);
store.notifyApplication(
new RMAppEvent(appId, RMAppEventType.APP_NEW_SAVED));
} catch (Exception e) {
LOG.error("Error storing app: " + appId, e);
if (e instanceof StoreLimitException) {
store.notifyApplication(
new RMAppEvent(appId, RMAppEventType.APP_SAVE_FAILED,
e.getMessage()));
} else {
isFenced = store.notifyStoreOperationFailedInternal(e);
}
}
return finalState(isFenced);
};
}
/**
* This method is called to notify the application that
* new application is stored or updated in state store
* @param event App event containing the app id and event type
*/
private void notifyApplication(RMAppEvent event) {
rmDispatcher.getEventHandler().handle(event);
}
AddApplicationToSchedulerTransition处理newSaved类型的RMAppEvent
中央异步调度器AsyncDispatcher将RMAppEvent分发给ApplicationEventDispatcher处理。
ApplicationEventDispatcher获取RMAppEvent对应的RMAppImpl,委托给RMAppImpl处理RMAppEvent。
RMAppImpl注册了处理appNewSaved类型的RMAppEvent的transition是AddApplicationToSchedulerTransition。
AddApplicationToSchedulerTransition创建appAdded类型的SchedulerEvent,提交给中央异步调度器AsyncDispatcher。
.addTransition(RMAppState.NEW_SAVING, RMAppState.SUBMITTED,
RMAppEventType.APP_NEW_SAVED, new AddApplicationToSchedulerTransition())
private static final class AddApplicationToSchedulerTransition extends
RMAppTransition {
@Override
public void transition(RMAppImpl app, RMAppEvent event) {
app.handler.handle(
new AppAddedSchedulerEvent(app.user, app.submissionContext, false,
app.applicationPriority, app.placementContext));
// send the ATS create Event
app.sendATSCreateEvent();
}
}
public AppAddedSchedulerEvent(ApplicationId applicationId, String queue,
String user, boolean isAppRecovering, ReservationId reservationID,
Priority appPriority, ApplicationPlacementContext placementContext) {
super(SchedulerEventType.APP_ADDED);
this.applicationId = applicationId;
this.queue = queue;
this.user = user;
this.reservationID = reservationID;
this.isAppRecovering = isAppRecovering;
this.appPriority = appPriority;
this.placementContext = placementContext;
}
AsyncDispatcher分发appAdded类型的SchedulerEvent给schedulerDispatcher处理
ResourceManager的内部类RMActiveService为AsyncDispatcher注册事件——SchedulerEventType类型由schedulerDispatcher处理。
protected void serviceInit(Configuration configuration) throws Exception {
scheduler = createScheduler();
scheduler.setRMContext(rmContext);
addIfService(scheduler);
rmContext.setScheduler(scheduler);
schedulerDispatcher = createSchedulerEventDispatcher();
addIfService(schedulerDispatcher);
rmDispatcher.register(SchedulerEventType.class, schedulerDispatcher);
}
protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() {
return new EventDispatcher(this.scheduler, "SchedulerEventDispatcher");
}
EventDispatcher介绍
EventDispatcher类似于AsyncDispatcher,同样是基于生产者消费者模型,内部维护了一个队列EventQueue,一个消费者线程EventProcessor从队列中获取event。不同的是,在EventDispatcher中,处理event的处理器是特定的,在初始化EventDispatcher就已经指定了EventHandler。在AsyncDispatcher中,event类型和EventHandler是多样的,内部有map集合维护event和EventHandler的映射关系。
例如schedulerDispatcher实例的事件处理器是Scheduler。
/**
* This is a specialized EventHandler to be used by Services that are expected
* handle a large number of events efficiently by ensuring that the caller
* thread is not blocked. Events are immediately stored in a BlockingQueue and
* a separate dedicated Thread consumes events from the queue and handles
* appropriately
* @param <T> Type of Event
*/
public class EventDispatcher<T extends Event> extends
AbstractService implements EventHandler<T> {
private final EventHandler<T> handler;
private final BlockingQueue<T> eventQueue =
new LinkedBlockingDeque<>();
private final Thread eventProcessor;
private volatile boolean stopped = false;
private boolean shouldExitOnError = true;
private static final Log LOG = LogFactory.getLog(EventDispatcher.class);
private final class EventProcessor implements Runnable {
@Override
public void run() {
T event;
while (!stopped && !Thread.currentThread().isInterrupted()) {
try {
event = eventQueue.take();
} catch (InterruptedException e) {
LOG.error("Returning, interrupted : " + e);
return; // TODO: Kill RM.
}
try {
handler.handle(event);
} catch (Throwable t) {
// An error occurred, but we are shutting down anyway.
// If it was an InterruptedException, the very act of
// shutdown could have caused it and is probably harmless.
if (stopped) {
LOG.warn("Exception during shutdown: ", t);
break;
}
LOG.fatal("Error in handling event type " + event.getType()
+ " to the Event Dispatcher", t);
if (shouldExitOnError
&& !ShutdownHookManager.get().isShutdownInProgress()) {
LOG.info("Exiting, bbye..");
System.exit(-1);
}
}
}
}
}
public EventDispatcher(EventHandler<T> handler, String name) {
super(name);
this.handler = handler;
this.eventProcessor = new Thread(new EventProcessor());
this.eventProcessor.setName(getName() + ":Event Processor");
}
@Override
protected void serviceStart() throws Exception {
this.eventProcessor.start();
super.serviceStart();
}
@Override
protected void serviceStop() throws Exception {
this.stopped = true;
this.eventProcessor.interrupt();
try {
this.eventProcessor.join();
} catch (InterruptedException e) {
throw new YarnRuntimeException(e);
}
super.serviceStop();
}
@Override
public void handle(T event) {
try {
int qSize = eventQueue.size();
if (qSize !=0 && qSize %1000 == 0) {
LOG.info("Size of " + getName() + " event-queue is " + qSize);
}
int remCapacity = eventQueue.remainingCapacity();
if (remCapacity < 1000) {
LOG.info("Very low remaining capacity on " + getName() + "" +
"event queue: " + remCapacity);
}
this.eventQueue.put(event);
} catch (InterruptedException e) {
LOG.info("Interrupted. Trying to exit gracefully.");
}
}
@VisibleForTesting
public void disableExitOnError() {
shouldExitOnError = false;
}
}
FifoScheduler处理appAdded类型的SchedulerEvent
假设在ResourceManager中,createScheduler()方法创建的是FifoScheduler。
FifoScheduler会记录该app id和app到Sceduler的并发map集合中,然后创建Accepted类型的RMAppEvent,并提交给中央异步调度器AsyncDispatcher。
@Override
public void handle(SchedulerEvent event) {
switch(event.getType()) {
case APP_ADDED:
{
AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
//addApplication()方法记录该app id和app到AbstractYarnSceduler的并发map集合中
addApplication(appAddedEvent.getApplicationId(),
appAddedEvent.getQueue(), appAddedEvent.getUser(),
appAddedEvent.getIsAppRecovering());
}
break;
case APP_ATTEMPT_ADDED:
{
AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
(AppAttemptAddedSchedulerEvent) event;
addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
appAttemptAddedEvent.getTransferStateFromPreviousAttempt(),
appAttemptAddedEvent.getIsAttemptRecovering());
}
break;
..................
}
public synchronized void addApplication(ApplicationId applicationId,
String queue, String user, boolean isAppRecovering) {
SchedulerApplication<FifoAppAttempt> application =
new SchedulerApplication<>(DEFAULT_QUEUE, user);
//记录该app id和app到AbstractYarnSceduler的并发map集合中
applications.put(applicationId, application);
metrics.submitApp(user);
LOG.info("Accepted application " + applicationId + " from user: " + user
+ ", currently num of applications: " + applications.size());
if (isAppRecovering) {
if (LOG.isDebugEnabled()) {
LOG.debug(applicationId + " is recovering. Skip notifying APP_ACCEPTED");
}
} else {
rmContext.getDispatcher().getEventHandler()
.handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
}
}