YARN主节点RM受理用户提交的作业之后,创建AM并分配资源,然后密切注视着作业的进展。YARN的代码广泛使用着状态机(StateMachine),来调度处理走走停停粗粒度的作业流程。在此“有限状态机”模型中,一个宏观的过程被抽象成一台机器,其结构包括一组“状态”,一组触发规则和一组操作。从程序设计的角度看,定义一组状态和事件可以使用枚举(enum)类型,跳转规则使用一个表,或者一个结构数组,表的每一行代表一条规则,如果定义了M种状态和N种事件,那么理论上这个表就应该有 M x N 行,其中一行应该有四个字段: 当前状态、(到来的)事件类型、下一状态和所需的操作处理。之后使用Dispatcher实现操作和驱动。
Hadoop 的代码中,具体的状态机,特别是其跳转表,都是由程序动态生成的,而不是静态预定的。用来生成状态机及其跳转表的 “状态机工厂”具体类为 StateMachineFactory。 Hadoop 系统中可能有很多个作业,每个作业都得有自己的状态机,这些状态机有相同的跳转表(因而有相同的状态集合、相同的触发事件集合和操作集合),但是每个状态机所处的状态可能不同。显然两个作业不能共享同一个状态机,但是却可以共享同一个状态机工厂,因为它们的跳转表是一样的。再说,我们无法事先估计作业的数量,而只能来一个作业就为其动态“生产”一个状态机。
状态机工厂(StateMachineFactory)
hadoop-yarn-common\src\main\java\org\apache\hadoop\yarn\state\StateMachineFactory.java
@Public
@Evolving
final public class StateMachineFactory <OPERAND, STATE extends Enum<STATE>,
EVENTTYPE extends Enum<EVENTTYPE>, EVENT> {
//存储状态跳变的链表结构
private final TransitionsListNode transitionsListNode;
//状态机状态表:状态(当前) -> 事件 -> 状态跳变(包含下一状态和其对应的操作事件驱动类型)
private Map<STATE, Map<EVENTTYPE, Transition<OPERAND, STATE, EVENTTYPE, EVENT>>> stateMachineTable;
//默认初始化状态
private STATE defaultInitialState;
//内部类:状态跳变链表节点
private class TransitionsListNode {
//当前节点对应的跳变驱动
final ApplicableTransition<OPERAND, STATE, EVENTTYPE, EVENT> transition;
//下一个跳变驱动节点
final TransitionsListNode next;
//跳变驱动节点构造函数
TransitionsListNode
(ApplicableTransition<OPERAND, STATE, EVENTTYPE, EVENT> transition,
TransitionsListNode next) {
this.transition = transition;
this.next = next;
}
}
}
新建工厂实例
private StateMachineFactory
(StateMachineFactory<OPERAND, STATE, EVENTTYPE, EVENT> that,
boolean optimized) {
this.defaultInitialState = that.defaultInitialState;
this.transitionsListNode = that.transitionsListNode;
this.optimized = optimized;
if (optimized) {
makeStateMachineTable();
} else {
stateMachineTable = null;
}
构建状态
private void makeStateMachineTable() {
Stack<ApplicableTransition<OPERAND, STATE, EVENTTYPE, EVENT>> stack =
new Stack<ApplicableTransition<OPERAND, STATE, EVENTTYPE, EVENT>>();
Map<STATE, Map<EVENTTYPE, Transition<OPERAND, STATE, EVENTTYPE, EVENT>>>
prototype = new HashMap<STATE, Map<EVENTTYPE, Transition<OPERAND, STATE, EVENTTYPE, EVENT>>>();
prototype.put(defaultInitialState, null);
// I use EnumMap here because it'll be faster and denser. I would
// expect most of the states to have at least one transition.
stateMachineTable
= new EnumMap<STATE, Map<EVENTTYPE,
Transition<OPERAND, STATE, EVENTTYPE, EVENT>>>(prototype);
for (TransitionsListNode cursor = transitionsListNode;
cursor != null;
cursor = cursor.next) {
stack.push(cursor.transition);
}
//弹出所有的状态弧驱动 ApplicableTransition, 合成状态表
while (!stack.isEmpty()) {
stack.pop().apply(this);
}
}
构建状态表
//内部接口:状态跳变驱动的定义
private interface ApplicableTransition <OPERAND, STATE extends Enum<STATE>,
EVENTTYPE extends Enum<EVENTTYPE>, EVENT> {
void apply(StateMachineFactory<OPERAND, STATE, EVENTTYPE, EVENT> subject);
}
//状态跳变驱动实现类
static private class ApplicableSingleOrMultipleTransition
<OPERAND, STATE extends Enum<STATE>,
EVENTTYPE extends Enum<EVENTTYPE>, EVENT>
implements ApplicableTransition<OPERAND, STATE, EVENTTYPE, EVENT> {
final STATE preState; //前一状态
final EVENTTYPE eventType;//事件类型
final Transition<OPERAND, STATE, EVENTTYPE, EVENT> transition; //状态跳变,包含跳变后的事件和状态
ApplicableSingleOrMultipleTransition
(STATE preState, EVENTTYPE eventType,
Transition<OPERAND, STATE, EVENTTYPE, EVENT> transition) {
this.preState = preState;
this.eventType = eventType;
this.transition = transition;
}
//接受一个状态机和状态跳变表
@Override
public void apply(StateMachineFactory<OPERAND, STATE, EVENTTYPE, EVENT> subject) {
Map<EVENTTYPE, Transition<OPERAND, STATE, EVENTTYPE, EVENT>> transitionMap
= subject.stateMachineTable.get(preState); //取当前状态机的状态表
if (transitionMap == null) {
transitionMap = new HashMap<EVENTTYPE,
Transition<OPERAND, STATE, EVENTTYPE, EVENT>>();
subject.stateMachineTable.put(preState, transitionMap);
}
transitionMap.put(eventType, transition); //存储状态表
}
}
添加跳转规则
public StateMachineFactory<OPERAND, STATE, EVENTTYPE, EVENT> addTransition(
STATE preState, STATE postState, Set<EVENTTYPE> eventTypes,
SingleArcTransition<OPERAND, EVENT> hook) {
StateMachineFactory<OPERAND, STATE, EVENTTYPE, EVENT> factory = null;
for (EVENTTYPE event : eventTypes) {
if (factory == null) {
factory = addTransition(preState, postState, event, hook);
} else {
factory = factory.addTransition(preState, postState, event, hook);
}
}
return factory;
}
合成状态机
public StateMachine<STATE, EVENTTYPE, EVENT>
make(OPERAND operand, STATE initialState) {
return new InternalStateMachine(operand, initialState);
}
//继承自外部StateMachine
private class InternalStateMachine
implements StateMachine<STATE, EVENTTYPE, EVENT> {
private final OPERAND operand;
private STATE currentState;
InternalStateMachine(OPERAND operand, STATE initialState) {
this.operand = operand;
this.currentState = initialState;
if (!optimized) {
maybeMakeStateMachineTable();
}
}
@Override
public synchronized STATE getCurrentState() {
return currentState;
}
@Override
public synchronized STATE doTransition(EVENTTYPE eventType, EVENT event)
throws InvalidStateTransitionException {
//调用工厂的doTransition
currentState = StateMachineFactory.this.doTransition
(operand, currentState, eventType, event);
return currentState;
}
}
驱动状态机运行
private STATE doTransition
(OPERAND operand, STATE oldState, EVENTTYPE eventType, EVENT event)
throws InvalidStateTransitionException {
Map<EVENTTYPE, Transition<OPERAND, STATE, EVENTTYPE, EVENT>> transitionMap
= stateMachineTable.get(oldState);
if (transitionMap != null) {
Transition<OPERAND, STATE, EVENTTYPE, EVENT> transition
= transitionMap.get(eventType);
if (transition != null) {
//驱动带钩子函数的跳变弧
return transition.doTransition(operand, oldState, event, eventType);
}
}
throw new InvalidStateTransitionException(oldState, eventType);
}
带钩子函数的跳变弧
//状态跳变弧
private interface Transition<OPERAND, STATE extends Enum<STATE>,
EVENTTYPE extends Enum<EVENTTYPE>, EVENT> {
STATE doTransition(OPERAND operand, STATE oldState,
EVENT event, EVENTTYPE eventType);
}
//状态跳变单弧
private class SingleInternalArc
implements Transition<OPERAND, STATE, EVENTTYPE, EVENT> {
private STATE postState;
//外部状态跳变钩子函数
private SingleArcTransition<OPERAND, EVENT> hook; // transition hook
SingleInternalArc(STATE postState,
SingleArcTransition<OPERAND, EVENT> hook) {
this.postState = postState;
this.hook = hook;
}
@Override
public STATE doTransition(OPERAND operand, STATE oldState,
EVENT event, EVENTTYPE eventType) {
if (hook != null) {
hook.transition(operand, event);
}
return postState;
}
}
//状态跳变多弧
private class MultipleInternalArc
implements Transition<OPERAND, STATE, EVENTTYPE, EVENT>{
// Fields
private Set<STATE> validPostStates;
//外部状态跳变钩子函数
private MultipleArcTransition<OPERAND, EVENT, STATE> hook; // transition hook
MultipleInternalArc(Set<STATE> postStates,
MultipleArcTransition<OPERAND, EVENT, STATE> hook) {
this.validPostStates = postStates;
this.hook = hook;
}
@Override
public STATE doTransition(OPERAND operand, STATE oldState,
EVENT event, EVENTTYPE eventType)
throws InvalidStateTransitionException {
STATE postState = hook.transition(operand, event);
if (!validPostStates.contains(postState)) {
throw new InvalidStateTransitionException(oldState, eventType);
}
return postState;
}
}
}
构建一个状态机按:实例化状态机工厂,添加跳变弧和跳变规则,合成状态机,驱动运行。状态机工厂内部通过堆栈和链表来协助构建状态机。
状态机驱动器
hadoop-yarn-common\src\main\java\org\apache\hadoop\yarn\event\Event.java
事件定义为泛型枚举,可扩展
@Public
@Evolving
public interface Event<TYPE extends Enum<TYPE>> {
TYPE getType();
long getTimestamp();
String toString();
}
hadoop-yarn-common\src\main\java\org\apache\hadoop\yarn\event\EventHandler.java
事件处理器,由具体的事件处理者实现
@SuppressWarnings("rawtypes")
@Public
@Evolving
public interface EventHandler<T extends Event> {
void handle(T event);
}
hadoop-yarn-common\src\main\java\org\apache\hadoop\yarn\event\Dispatcher .java
事件派发者,可扩展同步异步派发器,如AsyncDispatcher
@SuppressWarnings("rawtypes")
@Public
@Evolving
public interface Dispatcher {
public static final String DISPATCHER_EXIT_ON_ERROR_KEY =
"yarn.dispatcher.exit-on-error";
public static final boolean DEFAULT_DISPATCHER_EXIT_ON_ERROR = false;
//派发器持有事件处理者
EventHandler getEventHandler();
//事件处理者必须将自己注册到事件派发器中
void register(Class<? extends Enum> eventType, EventHandler handler);
}
驱动器模型类似于观察者模式,事件处理器是观察者,Dispatcher做事件转发,被观察者是状态机,当状态机发生跳变时将驱动派发器派发来自状态机的事件,处理器作为观察者,接收处理自己感兴趣的事件,处理完成之后再次驱动状态机跳变运行,周而复始,直到状态机状态耗尽。