0、总结
1.hadoop其实是一个脚本
2.该脚本调用RunJar调用HadoopWordCount.jar的main函数(脚本里面可能设置好了依赖库)
3.调用job.waitForCompletion(true)提交
4.YARNRunner设置MRAppMaster为ApplicationMaster
5.JobSubmitter调用JobResourceUploader将HadoopWordCount.jar上传到hdfs
6.JobSubmitter通过InputFormat算出需要的MapTask数量
7.JobSubmitter调用ApplicationClientProtocol.submitApplication提交任务到ResourceManager
8.ResourceManager通过ClientRMService接收submitApplication请求
9.ResourceManager通过ApplicationMasterLauncher调用ContainerManagementProtocol.startContainers(…),通过脚本启动MRAppMaster
10.NodeManager通过ContainerManagerImpl接收startContainers请求
11.NodeManager通过ResourceLocalizationService下载jar
12.NodeManager通过ContainerLaunch执行脚本,运行MRAppMaster
1、通过hadoop脚本执行HadoopWordCount.jar
hadoop jar HadoopWordCount.jar org.test.hadoop.WordCount /wordcount.txt /hdfsOutput
1.hadoop jar hadoop-0.20.2-examples.jar [class name]的实质是:http://www.cppblog.com/mysileng/archive/2013/03/02/198176.html
1.利用hadoop这个脚本启动一个jvm进程;
2.jvm进程去运行org.apache.hadoop.util.RunJar这个java类;
3.org.apache.hadoop.util.RunJar解压hadoop-0.20.2-examples.jar到hadoop.tmp.dir/hadoop-unjar*/目录下;
4.org.apache.hadoop.util.RunJar动态的加载并运行Main-Class或指定的Class;
5.Main-Class或指定的Class中设定Job的各项属性
6.提交job到JobTracker上并监视运行情况。
注意:以上都是在jobClient上执行的。
2.通过RunJar执行HadoopWordCount.jar的main函数
public class RunJar {
public static void main(String[] args) throws Throwable {
new RunJar().run(args); --参数jar HadoopWordCount.jar org.test.hadoop.WordCount /wordcount.txt /hdfsOutput
->jarFile = new JarFile(fileName);
->Method main = mainClass.getMethod("main", String[].class);
->main.invoke(null, new Object[] {newArgs}); --org.test.hadoop.WordCount的main函数
}
}
3.HadoopWordCount.jar的main函数
public class WordCount {
public static void main(String[] args) throws Exception {
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
FileOutputFormat.setOutputPath(job,new Path(otherArgs[otherArgs.length - 1]));
job.waitForCompletion(true)
}
}
2、执行job.waitForCompletion(true)
public class Job extends JobContextImpl implements JobContext, AutoCloseable {
public boolean waitForCompletion(boolean verbose ) throws IOException, InterruptedException, ClassNotFoundException {
submit();//提交过程在submit()方法中
->connect();
->final JobSubmitter submitter = getJobSubmitter(cluster.getFileSystem(), cluster.getClient());
->submitter.submitJobInternal(Job.this, cluster);
}
}
1、上传jar到hdfs
2、设置根据分块大小,算出需要的MapTask数量
3、调用ApplicationClientProtocol.submitApplication提交任务
4、设置ApplicationMaster为MRAppMaster
class JobSubmitter {
private ClientProtocol submitClient;
private FileSystem jtFs;
JobStatus submitJobInternal(Job job, Cluster cluster) throws ClassNotFoundException, InterruptedException, IOException {
Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf);
Path submitJobDir = new Path(jobStagingArea, jobId.toString());
copyAndConfigureFiles(job, submitJobDir);
->JobResourceUploader rUploader = new JobResourceUploader(jtFs, useWildcards);
->rUploader.uploadResources(job, jobSubmitDir);
->uploadResourcesInternal(job, submitJobDir); --【JobResourceUploader】
->String jobJar = job.getJar(); --【JobResourceUploader】
->uploadJobJar(job, jobJar, submitJobDir, replication, statCache); --【JobResourceUploader】
->copyJar(jobJarPath, newJarPath, submitReplication); --【JobResourceUploader】
//应该是通过hdfs上传到hadoop
->jtFs.copyFromLocalFile(originalJarPath, submitJarFile); --【JobResourceUploader】
//设置需要MapTask的数量
int maps = writeSplits(job, submitJobDir);
->maps = writeNewSplits(job, jobSubmitDir);
->InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
->List<InputSplit> splits = input.getSplits(job);
->return array.length;
conf.setInt(MRJobConfig.NUM_MAPS, maps);
status = submitClient.submitJob(jobId, submitJobDir.toString(), job.getCredentials()); --submitClient是YARNRunner
->ApplicationSubmissionContext appContext = createApplicationSubmissionContext(conf, jobSubmitDir, ts);
->List<String> vargs = setupAMCommand(jobConf);
//设置ApplicationMaster为MRAppMaster
//public static final String APPLICATION_MASTER_CLASS = "org.apache.hadoop.mapreduce.v2.app.MRAppMaster";
->vargs.add(MRJobConfig.APPLICATION_MASTER_CLASS);
->ApplicationId applicationId = resMgrDelegate.submitApplication(appContext); --【YARNRunner】
->return client.submitApplication(appContext); --【ResourceMgrDelegate】
->SubmitApplicationRequest request = Records.newRecord(SubmitApplicationRequest.class); --【YarnClientImpl】
//rmClient是RPC客户端:ApplicationClientProtocol
->rmClient.submitApplication(request); --【YarnClientImpl】
}
}
3、通过ApplicationClientProtocol提交到ResourceManager
1.协议
public interface ApplicationClientProtocol extends ApplicationBaseProtocol {
public SubmitApplicationResponse submitApplication(SubmitApplicationRequest request) throws YarnException, IOException;
}
2.ResourceManager:ClientRMService接收来自客户端的submitApplication调用
public class ClientRMService extends AbstractService implements ApplicationClientProtocol {
private Server server;
protected void serviceStart() throws Exception {
this.server = rpc.getServer(ApplicationClientProtocol.class, this,clientBindAddress,conf, this.rmDTSecretManager,conf.getInt(YarnConfiguration.RM_CLIENT_THREAD_COUNT, YarnConfiguration.DEFAULT_RM_CLIENT_THREAD_COUNT));
}
//接收
public SubmitApplicationResponse submitApplication(SubmitApplicationRequest request) throws YarnException, IOException {
ApplicationSubmissionContext submissionContext = request.getApplicationSubmissionContext();
ApplicationId applicationId = submissionContext.getApplicationId();
rmAppManager.submitApplication(submissionContext, System.currentTimeMillis(), user);
->RMAppImpl application = createAndPopulateNewRMApp(submissionContext, submitTime, user, false, -1); --【RMAppManager】
->RMAppImpl application = new RMAppImpl(applicationId, rmContext, this.conf,submissionContext.getApplicationName(), user,submissionContext.getQueue(),submissionContext, this.scheduler, this.masterService,submitTime, submissionContext.getApplicationType(),submissionContext.getApplicationTags(), amReqs, placementContext,startTime);
->this.rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.START));
}
}
3.最终调用ContainerManagementProtocol.startContainers(…)
public class RMAppImpl implements RMApp, Recoverable {
private static final StateMachineFactory<RMAppImpl,RMAppState,RMAppEventType,RMAppEvent> stateMachineFactory = new StateMachineFactory<RMAppImpl,RMAppState,RMAppEventType,RMAppEvent>(RMAppState.NEW)
.addTransition(RMAppState.NEW, RMAppState.NEW_SAVING, RMAppEventType.START, new RMAppNewlySavingTransition())
public RMAppImpl(ApplicationId applicationId, RMContext rmContext...) {
this.applicationId = applicationId;
this.name = StringInterner.weakIntern(name);
this.rmContext = rmContext;
this.dispatcher = rmContext.getDispatcher();
this.scheduler = scheduler;
this.masterService = masterService;
applicationSchedulingEnvs.putAll(submissionContext.getAMContainerSpec().getEnvironment());
applicationSchedulingEnvs.putAll(submissionContext.getApplicationSchedulingPropertiesMap());
}
private static final class RMAppNewlySavingTransition extends RMAppTransition {
public void transition(RMAppImpl app, RMAppEvent event) {
app.rmContext.getStateStore().storeNewApplication(app);
->getRMStateStoreEventHandler().handle(new RMStateStoreAppEvent(appState));
->public RMStateStoreAppEvent(ApplicationStateData appState)
->super(RMStateStoreEventType.STORE_APP);
->getRMStateStoreEventHandler().handle(...
->store.notifyApplication(new RMAppEvent(appId, RMAppEventType.APP_NEW_SAVED)); --【RMStateStore.StoreAppTransition】
->app.handler.handle(new AppAddedSchedulerEvent(app.user, app.submissionContext, false, app.applicationPriority, app.placementContext)); --【RMAppImpl.AddApplicationToSchedulerTransition】
->addApplication(appAddedEvent.getApplicationId(), appAddedEvent.getQueue(), appAddedEvent.getUser(), appAddedEvent.getIsAppRecovering()); --【FifoScheduler】
->rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED)); --【FifoScheduler】
->app.createAndStartNewAttempt(false); --【RMAppImpl.StartAppAttemptTransition】
->handler.handle(new RMAppStartAttemptEvent(currentAttempt.getAppAttemptId(), transferStateFromPreviousAttempt));
->super(appAttemptId, RMAppAttemptEventType.START);
->handler.handle(...
->appAttempt.eventHandler.handle(new AppAttemptAddedSchedulerEvent(appAttempt.applicationAttemptId, transferStateFromPreviousAttempt)); --【RMAppAttemptImpl.AttemptStartedTransition】
->super(SchedulerEventType.APP_ATTEMPT_ADDED);
->addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(), appAttemptAddedEvent.getTransferStateFromPreviousAttempt(), appAttemptAddedEvent.getIsAttemptRecovering()); --【FifoScheduler】
->rmContext.getDispatcher().getEventHandler().handle(new RMAppAttemptEvent(appAttemptId,RMAppAttemptEventType.ATTEMPT_ADDED));
->rmContext.getStateStore().storeNewApplicationAttempt(this); --【RMAppAttemptImpl】
->getRMStateStoreEventHandler().handle(new RMStateStoreAppAttemptEvent(attemptState));
->super(RMStateStoreEventType.STORE_APP_ATTEMPT);
->store.notifyApplicationAttempt(new RMAppAttemptEvent(attemptState.getAttemptId(),RMAppAttemptEventType.ATTEMPT_NEW_SAVED)); --【RMStateStore.StoreAppAttemptTransition】
->appAttempt.launchAttempt();
->eventHandler.handle(new AMLauncherEvent(AMLauncherEventType.LAUNCH, this)); --【RMAppAttemptImpl】
->launch(application); --【ApplicationMasterLauncher】
->Runnable launcher = createRunnableLauncher(application, AMLauncherEventType.LAUNCH); --【ApplicationMasterLauncher】
->public void run()
->StartContainersResponse response = containerMgrProxy.startContainers(allRequests);
}
}
}
4、NodeManager:接收ContainerManagementProtocol.startContainers(…)请求
public class ContainerManagerImpl extends CompositeService implements ContainerManager {
private Server server;
protected void serviceStart() throws Exception {
server = rpc.getServer(ContainerManagementProtocol.class, this, initialAddress, serverConf, this.context.getNMTokenSecretManager(),conf.getInt(YarnConfiguration.NM_CONTAINER_MGR_THREAD_COUNT, YarnConfiguration.DEFAULT_NM_CONTAINER_MGR_THREAD_COUNT));
}
public StartContainersResponse startContainers(StartContainersRequest requests) throws YarnException, IOException {
startContainerInternal(containerTokenIdentifier, request, remoteUser);
->dispatcher.getEventHandler().handle(new ApplicationContainerInitEvent(container));
->super(container.getContainerId().getApplicationAttemptId().getApplicationId(), ApplicationEventType.INIT_CONTAINER);
->app.dispatcher.getEventHandler().handle(new ContainerInitEvent(container.getContainerId())); --【ApplicationImpl.InitContainerTransition】
->super(c, ContainerEventType.INIT_CONTAINER);
->container.dispatcher.getEventHandler().handle(new ContainerLocalizationRequestEvent(container, req)); --【ContainerImpl】
->super(LocalizationEventType.LOCALIZE_CONTAINER_RESOURCES, c);
->handleInitContainerResources((ContainerLocalizationRequestEvent) event); --【ResourceLocalizationService】
}
}
1.执行下载jar
ResourceLocalizationService.handleInitContainerResources(...)
->tracker.handle(new ResourceRequestEvent(req, e.getKey(), ctxt));
->super(resource, ResourceEventType.REQUEST);
->rsrc.dispatcher.getEventHandler().handle(new LocalizerResourceRequestEvent(rsrc, req.getVisibility(), ctxt, req.getLocalResourceRequest().getPattern())); --【LocalizedResource】
->super(LocalizerEventType.REQUEST_RESOURCE_LOCALIZATION, context.getContainerId().toString());
->publicLocalizer.addResource(req); --【ResourceLocalizationService】
->pending.put(queue.submit(new FSDownload(lfs, null, conf,publicDirDestPath, resource, request.getContext().getStatCache())),request); --【ResourceLocalizationService.PublicLocalizer】
->public Path call() throws Exception
->downloadAndUnpack(sCopy, destination); --【FSDownload】
->FileUtil.copy(sourceFileSystem, source, destinationFileSystem, destination, false,true, conf);
->public void run() { --【ResourceLocalizationService.PublicLocalizer】
->Future<Path> completed = queue.take(); --【ResourceLocalizationService.PublicLocalizer】
->Path local = completed.get();//应该是FSDownload执行完后返回结果 --【ResourceLocalizationService.PublicLocalizer】
->publicRsrc.handle(new ResourceLocalizedEvent(key, local, FileUtil.getDU(new File(local.toUri()))));
->super(rsrc, ResourceEventType.LOCALIZED);
->rsrc.dispatcher.getEventHandler().handle(new ContainerResourceLocalizedEvent(container, rsrc.rsrc, rsrc.localPath)); --【LocalizedResource.FetchSuccessTransition】
->super(container, ContainerEventType.RESOURCE_LOCALIZED, rsrc);
->container.sendScheduleEvent(); --【ContainerImpl.LocalizedTransition】
2.启动container
container.sendScheduleEvent(); --【ContainerImpl】
->ContainersLauncherEventType launcherEvent = ContainersLauncherEventType.LAUNCH_CONTAINER; --【ContainerImpl】
->dispatcher.getEventHandler().handle(new ContainersLauncherEvent(this, launcherEvent)); --【ContainerImpl】
->ContainerLaunch launch = new ContainerLaunch(context, getConfig(), dispatcher, exec, app, event.getContainer(), dirsHandler, containerManager);
->containerLauncher.submit(launch);
->public Integer call() --【ContainerLaunch】
3.启动container_脚本执行
public class ContainerLaunch implements Callable<Integer> {
public Integer call() {
final List<String> command = launchContext.getCommands();
launchContext.setCommands(newCmds);
exec.writeLaunchEnv(containerScriptOutStream, environment, localResources, launchContext.getCommands(),containerLogDir, user, nmEnvVars);
ret = launchContainer(new ContainerStartContext.Builder()
.setContainer(container)
.setLocalizedResources(localResources)
.setNmPrivateContainerScriptPath(nmPrivateContainerScriptPath)
.setNmPrivateTokensPath(nmPrivateTokensPath)
.setNmPrivateKeystorePath(nmPrivateKeystorePath)
.setNmPrivateTruststorePath(nmPrivateTruststorePath)
.setUser(user)
.setAppId(appIdStr)
.setContainerWorkDir(containerWorkDir)
.setContainerCsiVolumesRootDir(csiVolumesRoot)
.setLocalDirs(localDirs)
.setLogDirs(logDirs)
.setFilecacheDirs(filecacheDirs)
.setUserLocalDirs(userLocalDirs)
.setContainerLocalDirs(containerLocalDirs)
.setContainerLogDirs(containerLogDirs)
.setUserFilecacheDirs(userFilecacheDirs)
.setApplicationLocalDirs(applicationLocalDirs).build());
}
protected int launchContainer(ContainerStartContext ctx) throws IOException, ConfigurationException {
//启动MRAppMaster
return exec.launchContainer(ctx); --exec默认DefaultContainerExecutor
}
}