Hadoop中Yarnrunner里面submit Job以及AM生成 至Job处理过程源码解析(下)

中间隔了国庆, 好不容易才看明白了MRAppMaster如何启动其他container以及如何在NodeManager上面运行Task的。

上回写到了AM启动到最后其实是运行的MRAppMaster的main方法, 那么我们就从这里开始看他是如何启动其他container的, 首先看一下main方法:

public static void main(String[] args) {
try {
Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());

//其实这里代码看似很多, 但是基本上不需要看, 这里大部分代码是从本地读取各种配置然后重新创建相应的对象
//如containerId Host Port等等
String containerIdStr =
System.getenv(Environment.CONTAINER_ID.name());
String nodeHostString = System.getenv(Environment.NM_HOST.name());
String nodePortString = System.getenv(Environment.NM_PORT.name());
String nodeHttpPortString =
System.getenv(Environment.NM_HTTP_PORT.name());
String appSubmitTimeStr =
System.getenv(ApplicationConstants.APP_SUBMIT_TIME_ENV);

validateInputParam(containerIdStr,
Environment.CONTAINER_ID.name());
validateInputParam(nodeHostString, Environment.NM_HOST.name());
validateInputParam(nodePortString, Environment.NM_PORT.name());
validateInputParam(nodeHttpPortString,
Environment.NM_HTTP_PORT.name());
validateInputParam(appSubmitTimeStr,
ApplicationConstants.APP_SUBMIT_TIME_ENV);

ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
ApplicationAttemptId applicationAttemptId =
containerId.getApplicationAttemptId();
long appSubmitTime = Long.parseLong(appSubmitTimeStr);


//根据当前获取的配置, 创建appMaster
MRAppMaster appMaster =
new MRAppMaster(applicationAttemptId, containerId, nodeHostString,
Integer.parseInt(nodePortString),
Integer.parseInt(nodeHttpPortString), appSubmitTime);
ShutdownHookManager.get().addShutdownHook(
new MRAppMasterShutdownHook(appMaster), SHUTDOWN_HOOK_PRIORITY);
JobConf conf = new JobConf(new YarnConfiguration());
conf.addResource(new Path(MRJobConfig.JOB_CONF_FILE));

MRWebAppUtil.initialize(conf);
String jobUserName = System
.getenv(ApplicationConstants.Environment.USER.name());
conf.set(MRJobConfig.USER_NAME, jobUserName);
// Do not automatically close FileSystem objects so that in case of
// SIGTERM I have a chance to write out the job history. I'll be closing
// the objects myself.
conf.setBoolean("fs.automatic.close", false);

//这里才是最重要的, 就是调用serviceInit和serviceStart
//
initAndStartAppMaster(appMaster, conf, jobUserName);
} catch (Throwable t) {
LOG.fatal("Error starting MRAppMaster", t);
ExitUtil.terminate(1, t);
}
}



看一下initAndStartAppMaster:

protected static void initAndStartAppMaster(final MRAppMaster appMaster,
final JobConf conf, String jobUserName) throws IOException,
InterruptedException {
...

//这里就调用了MRAppMaster的init(serviceinit)和start(servicestart)方法
appMasterUgi.doAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws Exception {
appMaster.init(conf);
appMaster.start();
if(appMaster.errorHappenedShutDown) {
throw new IOException("Was asked to shut down.");
}
return null;
}
});
}



serviceInit主要是初始化一堆对象, 这里就直接看一下serviceStart了:

protected void serviceStart() throws Exception {
  

...

//创建Job
job = createJob(getConfig(), forcedState, shutDownMessage);

...

//会启动所有的services, 比较重要的就包括containerAllocator 和containerLauncher
//containerAllocator会调用其初始化函数, 然后在跑其中的start方法, 如果我们进去看的话可以看到RMContainerAllocator实际上是抽象类AbstractService的实现 //他的init和start方法调用的是serviceinit和servicestart, 再看一下servicestart里面是启动了eventHandlingThread以及allocatorThread
//其中eventHandlingThread是负责事件处理的, allocatorThread是执行heartbeat与RM进行状态汇报和container操作的
super.serviceStart();


// finally set the job classloader
MRApps.setClassLoader(jobClassLoader, getConfig());

if (initFailed) {
JobEvent initFailedEvent = new JobEvent(job.getID(), JobEventType.JOB_INIT_FAILED);
jobEventDispatcher.handle(initFailedEvent);
} else {
// 所有都启动后, 开始启动Job
startJobs();
}


}


那么既然所有的service都启动完成后, 就去看startJobs里面做了什么了:

protected void startJobs() {
/** create a job-start event to get this ball rolling */
JobEvent startJobEvent = new JobStartEvent(job.getID(),
recoveredJobStartTime);
/** send the job-start event. this triggers the job execution. */

//其实就是创建了JobStartEvent, 去JobImpl触发JobEventType.JOB_START transition
dispatcher.getEventHandler().handle(startJobEvent);
}


那么我们就要去看看JobImpl的状态机的定义了:

protected static final
StateMachineFactory<JobImpl, JobStateInternal, JobEventType, JobEvent>
stateMachineFactory
= new StateMachineFactory<JobImpl, JobStateInternal, JobEventType, JobEvent>
(JobStateInternal.NEW)

// Transitions from NEW state
.addTransition(JobStateInternal.NEW, JobStateInternal.NEW,
JobEventType.JOB_DIAGNOSTIC_UPDATE,
DIAGNOSTIC_UPDATE_TRANSITION)

...


//在这里, 会执行StartTransition这个方法
.addTransition(JobStateInternal.INITED, JobStateInternal.SETUP,
JobEventType.JOB_START,
new StartTransition())


那么接下来看一下StartTransition在做什么 (目前所有的事情都是在AM这个container里面做的, 还没有涉及到执行RM相关的操作)

public static class StartTransition
implements SingleArcTransition<JobImpl, JobEvent> {
/**
* This transition executes in the event-dispatcher thread, though it's
* triggered in MRAppMaster's startJobs() method.
*/
@Override
public void transition(JobImpl job, JobEvent event) {
JobStartEvent jse = (JobStartEvent) event;
if (jse.getRecoveredJobStartTime() != 0) {
job.startTime = jse.getRecoveredJobStartTime();
} else {
job.startTime = job.clock.getTime();
}
JobInitedEvent jie =
new JobInitedEvent(job.oldJobId,
job.startTime,
job.numMapTasks, job.numReduceTasks,
job.getState().toString(),
job.isUber());


//触发一些JobHistory相关的event
job.eventHandler.handle(new JobHistoryEvent(job.jobId, jie));
JobInfoChangeEvent jice = new JobInfoChangeEvent(job.oldJobId,
job.appSubmitTime, job.startTime);
job.eventHandler.handle(new JobHistoryEvent(job.jobId, jice));
//running Job +1
job.metrics.runningJob(job);

//触发CommitterEventHandler的JOB_SETUP事件
job.eventHandler.handle(new CommitterJobSetupEvent(
job.jobId, job.jobContext));
}
}


那么看一下CommitterEventHandler里面JOB_SETUP都做了什么:

public void run() {
LOG.info("Processing the event " + event.toString());
switch (event.getType()) {
case JOB_SETUP:
//JOB_SETUP回去调用handleJobSetup方法, 看一下
handleJobSetup((CommitterJobSetupEvent) event);
break;
case JOB_COMMIT:
handleJobCommit((CommitterJobCommitEvent) event);
break;
case JOB_ABORT:
handleJobAbort((CommitterJobAbortEvent) event);
break;
case TASK_ABORT:
handleTaskAbort((CommitterTaskAbortEvent) event);
break;
default:
throw new YarnRuntimeException("Unexpected committer event "
+ event.toString());
}
}

//handleJobSetup
protected void handleJobSetup(CommitterJobSetupEvent event) {
try {

//回去OutputCommitter执行setupJob, setupJob是一个抽象类, 会根
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值