/*
*当前类:org.apache.flink.client.deployment.executors.AbstractJobClusterExecutor
*StreamingExecutionEnvironment.execute(jobName)->AbstractJobClusterExecutor.execute(streamGraph, configuration, userClassloader)->YarnClusterDescriptor.deployJobCluster(clusterSpecification, jobGraph, configAccessor.getDetachedMode());
*///用之前创建的集群描述器调用deployJobCluster()方法,返回clusterClientProvider@OverridepublicClusterClientProvider<ApplicationId>deployJobCluster(ClusterSpecification clusterSpecification,JobGraph jobGraph,boolean detached)throwsClusterDeploymentException{try{returndeployInternal(
clusterSpecification,"Flink per-job cluster",getYarnJobClusterEntrypoint(),//这一步是获取yarnJobCluster入口类的类名,并不启动这个任务集群入口类,只是把类名传进去以后会用
jobGraph,
detached);//configAccessor.getDetachedMode()}catch(Exception e){thrownewClusterDeploymentException("Could not deploy Yarn job cluster.", e);}}//还在本类,deployInternal方法的具体逻辑:deployJobCluster(clusterSpecification, jobGraph, configAccessor.getDetachedMode())调用deployInternal(clusterSpecification,"Flink per-job cluster",getYarnJobClusterEntrypoint(),jobGraph,detached);/**
* This method will block until the ApplicationMaster/JobManager have been deployed on YARN.
*
* @param clusterSpecification Initial cluster specification for the Flink cluster to be deployed
* @param applicationName name of the Yarn application to start
* @param yarnClusterEntrypoint Class name of the Yarn cluster entry point.
* @param jobGraph A job graph which is deployed with the Flink cluster, {@code null} if none
* @param detached True if the cluster should be started in detached mode
*/privateClusterClientProvider<ApplicationId>deployInternal(ClusterSpecification clusterSpecification,String applicationName,String yarnClusterEntrypoint,@NullableJobGraph jobGraph,boolean detached)throwsException{finalUserGroupInformation currentUser =UserGroupInformation.getCurrentUser();if(HadoopUtils.isKerberosSecurityEnabled(currentUser)){boolean useTicketCache = flinkConfiguration.getBoolean(SecurityOptions.KERBEROS_LOGIN_USETICKETCACHE);if(!HadoopUtils.areKerberosCredentialsValid(currentUser, useTicketCache)){thrownewRuntimeException("Hadoop security with Kerberos is enabled but the login user "+"does not have Kerberos credentials or delegation tokens!");}}/*TODO 部署前检查:jar包路径、conf路径、yarn最大核数....*/isReadyForDeployment(clusterSpecification);// ------------------ Check if the specified queue exists --------------------/*TODO 检查指定的yarn队列是否存在*/checkYarnQueues(yarnClient);//之前创建集群描述器传进来的yarnClient,此时已经包含配置属性,见yarnClient.init(yarnConfiguration);// ------------------ Check if the YARN ClusterClient has the requested resources --------------/*TODO 检查yarn是否有足够的资源*/// Create application via yarnClient,跟yarn集群交互,创建applicationfinalYarnClientApplication yarnApplication = yarnClient.createApplication();finalGetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();Resource maxRes = appResponse.getMaximumResourceCapability();finalClusterResourceDescription freeClusterMem;try{
freeClusterMem =getCurrentFreeClusterResources(yarnClient);}catch(YarnException|IOException e){failSessionDuringDeployment(yarnClient, yarnApplication);thrownewYarnDeploymentException("Could not retrieve information about free cluster resources.", e);}finalint yarnMinAllocationMB = yarnConfiguration.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);if(yarnMinAllocationMB <=0){thrownewYarnDeploymentException("The minimum allocation memory "+"("+ yarnMinAllocationMB +" MB) configured via '"+YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB
+"' should be greater than 0.");}finalClusterSpecification validClusterSpecification;//校验集群的资源并获取有效的配置try{
validClusterSpecification =validateClusterResources(
clusterSpecification,
yarnMinAllocationMB,
maxRes,
freeClusterMem);}catch(YarnDeploymentException yde){failSessionDuringDeployment(yarnClient, yarnApplication);throw yde;}
LOG.info("Cluster specification: {}", validClusterSpecification);finalClusterEntrypoint.ExecutionMode executionMode = detached ?ClusterEntrypoint.ExecutionMode.DETACHED
:ClusterEntrypoint.ExecutionMode.NORMAL;
flinkConfiguration.setString(ClusterEntrypoint.EXECUTION_MODE, executionMode.toString());/*TODO 开始启动AM*/ApplicationReport report =startAppMaster(
flinkConfiguration,
applicationName,
yarnClusterEntrypoint,
jobGraph,
yarnClient,
yarnApplication,
validClusterSpecification);// print the application id for user to cancel themselves.if(detached){finalApplicationId yarnApplicationId = report.getApplicationId();logDetachedClusterInformation(yarnApplicationId, LOG);}setClusterEntrypointInfoToConfig(report);return()->{try{returnnewRestClusterClient<>(flinkConfiguration, report.getApplicationId());}catch(Exception e){thrownewRuntimeException("Error while creating RestClusterClient.", e);}};}
privateApplicationReportstartAppMaster(Configuration configuration,String applicationName,String yarnClusterEntrypoint,JobGraph jobGraph,YarnClient yarnClient,YarnClientApplication yarnApplication,ClusterSpecification clusterSpecification)throwsException{// ------------------ Initialize the file systems -------------------------/*TODO 初始化、创建 Hadoop的 FileSystem*/org.apache.flink.core.fs.FileSystem.initialize(
configuration,PluginUtils.createPluginManagerFromRootFolder(configuration));// 初始化文件系统(HDFS)finalFileSystem fs =FileSystem.get(yarnConfiguration);// hard coded check for the GoogleHDFS client because its not overriding the getScheme() method.if(!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem")&&
fs.getScheme().startsWith("file")){
LOG.warn("The file system scheme is '"+ fs.getScheme()+"'. This indicates that the "+"specified Hadoop configuration path is wrong and the system is using the default Hadoop configuration values."+"The Flink YARN client needs to store its files in a distributed file system");}ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();finalList<Path> providedLibDirs =Utils.getQualifiedRemoteSharedPaths(configuration, yarnConfiguration);/*TODO Yarn应用的文件上传器:FS、对应的HDFS路径
* 用来上传:用户jar包、flink的依赖、flink的配置文件(接下来接近300行,不用看)
* 直接跳到 fileUploader.close()
* */// 上传文件的工具类finalYarnApplicationFileUploader fileUploader =YarnApplicationFileUploader.from(
fs,getStagingDir(fs),
providedLibDirs,
appContext.getApplicationId(),getFileReplication());// The files need to be shipped and added to classpath.Set<File> systemShipFiles =newHashSet<>(shipFiles.size());//赋值见178行,调用本类addShipFilesfor(File file : shipFiles){
systemShipFiles.add(file.getAbsoluteFile());//把shipFiles遍历添加进systemShipFiles}//拿日志配置finalString logConfigFilePath = configuration.getString(YarnConfigOptionsInternal.APPLICATION_LOG_CONFIG_FILE);if(logConfigFilePath !=null){
systemShipFiles.add(newFile(logConfigFilePath));//再把配置文件里拿到的日志文件路径添加到systemShipFiles,后面这个systemShipFiles要通过fileUploader传到hdfs}// Set-up ApplicationSubmissionContext for the applicationfinalApplicationId appId = appContext.getApplicationId();// ------------------ Add Zookeeper namespace to local flinkConfiguraton ------String zkNamespace =getZookeeperNamespace();// no user specified cli argument for namespace?if(zkNamespace ==null|| zkNamespace.isEmpty()){// namespace defined in config? else use applicationId as default.
zkNamespace = configuration.getString(HighAvailabilityOptions.HA_CLUSTER_ID,String.valueOf(appId));setZookeeperNamespace(zkNamespace);}
configuration.setString(HighAvailabilityOptions.HA_CLUSTER_ID, zkNamespace);/*TODO 高可用配置:重试次数,默认2次*/if(HighAvailabilityMode.isHighAvailabilityModeActivated(configuration)){// activate re-execution of failed applications
appContext.setMaxAppAttempts(
configuration.getInteger(YarnConfigOptions.APPLICATION_ATTEMPTS.key(),YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));activateHighAvailabilitySupport(appContext);}else{// set number of application retries to 1 in the default case
appContext.setMaxAppAttempts(
configuration.getInteger(YarnConfigOptions.APPLICATION_ATTEMPTS.key(),1));}/*TODO 添加用户jar包*/finalSet<Path> userJarFiles =newHashSet<>();if(jobGraph !=null){
userJarFiles.addAll(jobGraph.getUserJars().stream().map(f -> f.toUri()).map(Path::new).collect(Collectors.toSet()));}finalList<URI> jarUrls =ConfigUtils.decodeListFromConfig(configuration,PipelineOptions.JARS, URI::create);if(jarUrls !=null&&YarnApplicationClusterEntryPoint.class.getName().equals(yarnClusterEntrypoint)){
userJarFiles.addAll(jarUrls.stream().map(Path::new).collect(Collectors.toSet()));}// only for per job modeif(jobGraph !=null){for(Map.Entry<String,DistributedCache.DistributedCacheEntry> entry : jobGraph.getUserArtifacts().entrySet()){// only upload local filesif(!Utils.isRemotePath(entry.getValue().filePath)){Path localPath =newPath(entry.getValue().filePath);Tuple2<Path,Long> remoteFileInfo =
fileUploader.uploadLocalFileToRemote(localPath, entry.getKey());
jobGraph.setUserArtifactRemotePath(entry.getKey(), remoteFileInfo.f0.toString());}}
jobGraph.writeUserArtifactEntriesToConfiguration();}if(providedLibDirs ==null|| providedLibDirs.isEmpty()){addLibFoldersToShipFiles(systemShipFiles);}// Register all files in provided lib dirs as local resources with public visibility// and upload the remaining dependencies as local resources with APPLICATION visibility.finalList<String> systemClassPaths = fileUploader.registerProvidedLocalResources();finalList<String> uploadedDependencies = fileUploader.registerMultipleLocalResources(
systemShipFiles.stream().map(e ->newPath(e.toURI())).collect(Collectors.toSet()),Path.CUR_DIR,LocalResourceType.FILE);
systemClassPaths.addAll(uploadedDependencies);// upload and register ship-only files// Plugin files only need to be shipped and should not be added to classpath.if(providedLibDirs ==null|| providedLibDirs.isEmpty()){Set<File> shipOnlyFiles =newHashSet<>();addPluginsFoldersToShipFiles(shipOnlyFiles);
fileUploader.registerMultipleLocalResources(
shipOnlyFiles.stream().map(e ->newPath(e.toURI())).collect(Collectors.toSet()),Path.CUR_DIR,LocalResourceType.FILE);}if(!shipArchives.isEmpty()){
fileUploader.registerMultipleLocalResources(
shipArchives.stream().map(e ->newPath(e.toURI())).collect(Collectors.toSet()),Path.CUR_DIR,LocalResourceType.ARCHIVE);}// Upload and register user jarsfinalList<String> userClassPaths = fileUploader.registerMultipleLocalResources(
userJarFiles,
userJarInclusion ==YarnConfigOptions.UserJarInclusion.DISABLED
?ConfigConstants.DEFAULT_FLINK_USR_LIB_DIR
:Path.CUR_DIR,LocalResourceType.FILE);if(userJarInclusion ==YarnConfigOptions.UserJarInclusion.ORDER){
systemClassPaths.addAll(userClassPaths);}// normalize classpath by sortingCollections.sort(systemClassPaths);Collections.sort(userClassPaths);// classpath assemblerStringBuilder classPathBuilder =newStringBuilder();if(userJarInclusion ==YarnConfigOptions.UserJarInclusion.FIRST){for(String userClassPath : userClassPaths){
classPathBuilder.append(userClassPath).append(File.pathSeparator);}}for(String classPath : systemClassPaths){
classPathBuilder.append(classPath).append(File.pathSeparator);}// 多次调用上传 HDFS 的方法,分别是:// => systemShipFiles:日志的配置文件、lib/目录下除了 dist 的 jar 包// => shipOnlyFiles:plugins/目录下的文件// => userJarFiles:用户代码的 jar 包
fileUploader.registerMultipleLocalResources (......);// Setup jar for ApplicationMaster// 上传和配置 ApplicationMaster 的 jar 包:flink-dist*.jarfinalYarnLocalResourceDescriptor localResourceDescFlinkJar = fileUploader.uploadFlinkDist(flinkJarPath);
classPathBuilder.append(localResourceDescFlinkJar.getResourceKey()).append(File.pathSeparator);// write job graph to tmp file and add it to local resource// TODO: server use user main method to generate job graphif(jobGraph !=null){File tmpJobGraphFile =null;try{
tmpJobGraphFile =File.createTempFile(appId.toString(),null);try(FileOutputStream output =newFileOutputStream(tmpJobGraphFile);ObjectOutputStream obOutput =newObjectOutputStream(output)){
obOutput.writeObject(jobGraph);}finalString jobGraphFilename ="job.graph";
configuration.setString(JOB_GRAPH_FILE_PATH, jobGraphFilename);// 将 JobGraph 写入 tmp 文件并添加到本地资源,并上传到 HDFS
fileUploader.registerSingleLocalResource(
jobGraphFilename,newPath(tmpJobGraphFile.toURI()),"",LocalResourceType.FILE,true,false);
classPathBuilder.append(jobGraphFilename).append(File.pathSeparator);}catch(Exception e){
LOG.warn("Add job graph to local resource fail.");throw e;}finally{if(tmpJobGraphFile !=null&&!tmpJobGraphFile.delete()){
LOG.warn("Fail to delete temporary file {}.", tmpJobGraphFile.toPath());}}}// Upload the flink configuration// write out configuration file/*TODO 上传Flink的配置文件 - flink-conf.yaml*/File tmpConfigurationFile =null;try{
tmpConfigurationFile =File.createTempFile(appId +"-flink-conf.yaml",null);BootstrapTools.writeConfiguration(configuration, tmpConfigurationFile);String flinkConfigKey ="flink-conf.yaml";
fileUploader.registerSingleLocalResource(
flinkConfigKey,newPath(tmpConfigurationFile.getAbsolutePath()),"",LocalResourceType.FILE,true,true);
classPathBuilder.append("flink-conf.yaml").append(File.pathSeparator);}finally{if(tmpConfigurationFile !=null&&!tmpConfigurationFile.delete()){
LOG.warn("Fail to delete temporary file {}.", tmpConfigurationFile.toPath());}}if(userJarInclusion ==YarnConfigOptions.UserJarInclusion.LAST){for(String userClassPath : userClassPaths){
classPathBuilder.append(userClassPath).append(File.pathSeparator);}}//To support Yarn Secure Integration Test Scenario//In Integration test setup, the Yarn containers created by YarnMiniCluster does not have the Yarn site XML//and KRB5 configuration files. We are adding these files as container local resources for the container//applications (JM/TMs) to have proper secure cluster setupPath remoteYarnSiteXmlPath =null;if(System.getenv("IN_TESTS")!=null){File f =newFile(System.getenv("YARN_CONF_DIR"),Utils.YARN_SITE_FILE_NAME);
LOG.info("Adding Yarn configuration {} to the AM container local resource bucket", f.getAbsolutePath());Path yarnSitePath =newPath(f.getAbsolutePath());
remoteYarnSiteXmlPath = fileUploader.registerSingleLocalResource(Utils.YARN_SITE_FILE_NAME,
yarnSitePath,"",LocalResourceType.FILE,false,false).getPath();if(System.getProperty("java.security.krb5.conf")!=null){
configuration.set(SecurityOptions.KERBEROS_KRB5_PATH,System.getProperty("java.security.krb5.conf"));}}//上传krb5文件Path remoteKrb5Path =null;boolean hasKrb5 =false;String krb5Config = configuration.get(SecurityOptions.KERBEROS_KRB5_PATH);if(!StringUtils.isNullOrWhitespaceOnly(krb5Config)){finalFile krb5 =newFile(krb5Config);
LOG.info("Adding KRB5 configuration {} to the AM container local resource bucket", krb5.getAbsolutePath());finalPath krb5ConfPath =newPath(krb5.getAbsolutePath());
remoteKrb5Path = fileUploader.registerSingleLocalResource(Utils.KRB5_FILE_NAME,
krb5ConfPath,"",LocalResourceType.FILE,false,false).getPath();
hasKrb5 =true;}Path remotePathKeytab =null;String localizedKeytabPath =null;String keytab = configuration.getString(SecurityOptions.KERBEROS_LOGIN_KEYTAB);if(keytab !=null){boolean localizeKeytab = flinkConfiguration.getBoolean(YarnConfigOptions.SHIP_LOCAL_KEYTAB);
localizedKeytabPath = flinkConfiguration.getString(YarnConfigOptions.LOCALIZED_KEYTAB_PATH);if(localizeKeytab){// Localize the keytab to YARN containers via local resource.
LOG.info("Adding keytab {} to the AM container local resource bucket", keytab);
remotePathKeytab = fileUploader.registerSingleLocalResource(
localizedKeytabPath,newPath(keytab),"",LocalResourceType.FILE,false,false).getPath();}else{// // Assume Keytab is pre-installed in the container.
localizedKeytabPath = flinkConfiguration.getString(YarnConfigOptions.LOCALIZED_KEYTAB_PATH);}}/*TODO jobmanager内存配置*/finalJobManagerProcessSpec processSpec =JobManagerProcessUtils.processSpecFromConfigWithNewOptionToInterpretLegacyHeap(
flinkConfiguration,JobManagerOptions.TOTAL_PROCESS_MEMORY);//封装启动 AM container 的 Java 命令finalContainerLaunchContext amContainer =setupApplicationMasterContainer(
yarnClusterEntrypoint,
hasKrb5,
processSpec);// setup security tokensif(UserGroupInformation.isSecurityEnabled()){// set HDFS delegation tokens when security is enabled
LOG.info("Adding delegation token to the AM container.");List<Path> yarnAccessList =ConfigUtils.decodeListFromConfig(configuration,YarnConfigOptions.YARN_ACCESS,Path::new);Utils.setTokensFor(amContainer,ListUtils.union(yarnAccessList, fileUploader.getRemotePaths()), yarnConfiguration
);}
amContainer.setLocalResources(fileUploader.getRegisteredLocalResources());
fileUploader.close();// Setup CLASSPATH and environment variables for ApplicationMaster/*TODO 创建Map,用来存储 AM的环境变量和类路径*/finalMap<String,String> appMasterEnv =newHashMap<>();// set user specified app master environment variables
appMasterEnv.putAll(ConfigurationUtils.getPrefixedKeyValuePairs(ResourceManagerOptions.CONTAINERIZED_MASTER_ENV_PREFIX, configuration));// set Flink app class path
appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString());// set Flink on YARN internal configuration values
appMasterEnv.put(YarnConfigKeys.FLINK_DIST_JAR, localResourceDescFlinkJar.toString());
appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString());
appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, fileUploader.getHomeDir().toString());
appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES,encodeYarnLocalResourceDescriptorListToString(fileUploader.getEnvShipResourceList()));
appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE,getZookeeperNamespace());
appMasterEnv.put(YarnConfigKeys.FLINK_YARN_FILES, fileUploader.getApplicationDir().toUri().toString());// https://github.com/apache/hadoop/blob/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnApplicationSecurity.md#identity-on-an-insecure-cluster-hadoop_user_name
appMasterEnv.put(YarnConfigKeys.ENV_HADOOP_USER_NAME,UserGroupInformation.getCurrentUser().getUserName());if(localizedKeytabPath !=null){
appMasterEnv.put(YarnConfigKeys.LOCAL_KEYTAB_PATH, localizedKeytabPath);String principal = configuration.getString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL);
appMasterEnv.put(YarnConfigKeys.KEYTAB_PRINCIPAL, principal);if(remotePathKeytab !=null){
appMasterEnv.put(YarnConfigKeys.REMOTE_KEYTAB_PATH, remotePathKeytab.toString());}}//To support Yarn Secure Integration Test Scenarioif(remoteYarnSiteXmlPath !=null){
appMasterEnv.put(YarnConfigKeys.ENV_YARN_SITE_XML_PATH, remoteYarnSiteXmlPath.toString());}if(remoteKrb5Path !=null){
appMasterEnv.put(YarnConfigKeys.ENV_KRB5_PATH, remoteKrb5Path.toString());}// set classpath from YARN configurationUtils.setupYarnClassPath(yarnConfiguration, appMasterEnv);/*TODO 将之前封装的 Map(AM的环境信息、类路径),设置到容器里*/
amContainer.setEnvironment(appMasterEnv);// Set up resource type requirements for ApplicationMasterResource capability =Records.newRecord(Resource.class);
capability.setMemory(clusterSpecification.getMasterMemoryMB());
capability.setVirtualCores(flinkConfiguration.getInteger(YarnConfigOptions.APP_MASTER_VCORES));finalString customApplicationName = customName !=null? customName : applicationName;
appContext.setApplicationName(customApplicationName);
appContext.setApplicationType(applicationType !=null? applicationType :"Apache Flink");
appContext.setAMContainerSpec(amContainer);
appContext.setResource(capability);// Set priority for applicationint priorityNum = flinkConfiguration.getInteger(YarnConfigOptions.APPLICATION_PRIORITY);if(priorityNum >=0){Priority priority =Priority.newInstance(priorityNum);
appContext.setPriority(priority);}if(yarnQueue !=null){
appContext.setQueue(yarnQueue);}setApplicationNodeLabel(appContext);setApplicationTags(appContext);// add a hook to clean up in case deployment failsThread deploymentFailureHook =newDeploymentFailureHook(yarnApplication, fileUploader.getApplicationDir());Runtime.getRuntime().addShutdownHook(deploymentFailureHook);
LOG.info("Submitting application master "+ appId);/*TODO 前面做了很多上传、环境配置,终于可以提交应用了*/
yarnClient.submitApplication(appContext);
LOG.info("Waiting for the cluster to be allocated");finallong startTime =System.currentTimeMillis();ApplicationReport report;YarnApplicationState lastAppState =YarnApplicationState.NEW;
loop:while(true){try{
report = yarnClient.getApplicationReport(appId);}catch(IOException e){thrownewYarnDeploymentException("Failed to deploy the cluster.", e);}YarnApplicationState appState = report.getYarnApplicationState();
LOG.debug("Application State: {}", appState);switch(appState){case FAILED:case KILLED:thrownewYarnDeploymentException("The YARN application unexpectedly switched to state "+ appState +" during deployment. \n"+"Diagnostics from YARN: "+ report.getDiagnostics()+"\n"+"If log aggregation is enabled on your cluster, use this command to further investigate the issue:\n"+"yarn logs -applicationId "+ appId);//break ..case RUNNING:
LOG.info("YARN application has been deployed successfully.");break loop;case FINISHED:
LOG.info("YARN application has been finished successfully.");break loop;default:if(appState != lastAppState){
LOG.info("Deploying cluster, current state "+ appState);}if(System.currentTimeMillis()- startTime >60000){
LOG.info("Deployment took more than 60 seconds. Please check if the requested resources are available in the YARN cluster");}}
lastAppState = appState;Thread.sleep(250);}// since deployment was successful, remove the hookShutdownHookUtil.removeShutdownHook(deploymentFailureHook,getClass().getSimpleName(), LOG);return report;}// 多次调用上传 HDFS 的方法,分别是:// => systemShipFiles:日志的配置文件、lib/目录下除了 dist 的 jar 包// => shipOnlyFiles:plugins/目录下的文件// => userJarFiles:用户代码的 jar 包/*
*fileuploader注册:krb,yarn-site,flink-conf.yaml,jobGraph,jar包和依赖lib路径,包含配置的shipFiles
*fileUploader发送:flinkJarPath和一些perjob模式需要的本地文件、注册本地文件,上传一些本地jar包路径和一些本地文件,封装配置appContext、shipfiles
*classPathBuilder包含了一堆类路径,classPathBuilder封装到appMasterEnv,appMasterEnv要拿yarn配置和fileUploader里面的一些东西,appMasterEnv最后封装到amContainer,amContainer封装到amContext
*
*appContext:应用提交的上下文,包含amContainer;高可用配置--重试次数,默认2次;ApplicationName;ApplicationType;资源容量信息;优先级;队列
*amContainer:AppMaster容器,包含yarnClusterEntrypoint(yarn集群入口类名YarnJobClusterEntryPoint--main);processSpec(JM内存配置);从fileUploader获取的本地资源(fileUploader注册的东西);appMasterEnv
*appMasterEnv:appMaster的环境配置,包含AM的环境变量和类路径,还包含classPathBuilder,classPathBuilder包含了一堆类路径
*fileUploader:注册本地文件,上传一些本地jar包路径和一些本地文件,封装配置appContext、sysShipFiles(包含创建描述器时赋值的shipFiles)*///总结来说就是封装了所有的配置属性,注册了所有的本地文件和类路径,往hdfs里面发了需要的本地文件,上传用户的jar包和依赖,然后弄到容器、上下文里,调用yarn的方法来提交应用
//接【封装启动 AM container 的 Java 命令】// final ContainerLaunchContext amContainer = setupApplicationMasterContainer(/ContainerLaunchContextsetupApplicationMasterContainer(String yarnClusterEntrypoint,boolean hasKrb5,JobManagerProcessSpec processSpec){// ------------------ Prepare Application Master Container ------------------------------// respect custom JVM options in the YAML fileString javaOpts = flinkConfiguration.getString(CoreOptions.FLINK_JVM_OPTIONS);if(flinkConfiguration.getString(CoreOptions.FLINK_JM_JVM_OPTIONS).length()>0){
javaOpts +=" "+ flinkConfiguration.getString(CoreOptions.FLINK_JM_JVM_OPTIONS);}//krb5.conf file will be available as local resource in JM/TM containerif(hasKrb5){
javaOpts +=" -Djava.security.krb5.conf=krb5.conf";}// Set up the container launch context for the application masterContainerLaunchContext amContainer =Records.newRecord(ContainerLaunchContext.class);finalMap<String,String> startCommandValues =newHashMap<>();
startCommandValues.put("java","$JAVA_HOME/bin/java");String jvmHeapMem =JobManagerProcessUtils.generateJvmParametersStr(processSpec, flinkConfiguration);
startCommandValues.put("jvmmem", jvmHeapMem);
startCommandValues.put("jvmopts", javaOpts);
startCommandValues.put("logging",YarnLogConfigUtil.getLoggingYarnCommand(flinkConfiguration));
startCommandValues.put("class", yarnClusterEntrypoint);
startCommandValues.put("redirects","1> "+ApplicationConstants.LOG_DIR_EXPANSION_VAR +"/jobmanager.out "+"2> "+ApplicationConstants.LOG_DIR_EXPANSION_VAR +"/jobmanager.err");String dynamicParameterListStr =JobManagerProcessUtils.generateDynamicConfigsStr(processSpec);
startCommandValues.put("args", dynamicParameterListStr);finalString commandTemplate = flinkConfiguration
.getString(ConfigConstants.YARN_CONTAINER_START_COMMAND_TEMPLATE,ConfigConstants.DEFAULT_YARN_CONTAINER_START_COMMAND_TEMPLATE);finalString amCommand =BootstrapTools.getStartCommand(commandTemplate, startCommandValues);
amContainer.setCommands(Collections.singletonList(amCommand));
LOG.debug("Application Master start command: "+ amCommand);return amContainer;}//拿Java环境变量配置和一些jvm选项,创建包含启动命令的amContainer
2、向yarn提交任务
//YarnClientImpl.java@OverridepublicApplicationIdsubmitApplication(ApplicationSubmissionContext appContext)throwsYarnException,IOException{ApplicationId applicationId = appContext.getApplicationId();if(applicationId ==null){thrownewApplicationIdNotProvidedException("ApplicationId is not provided in ApplicationSubmissionContext");}SubmitApplicationRequest request =Records.newRecord(SubmitApplicationRequest.class);
request.setApplicationSubmissionContext(appContext);//这里是提交的核心逻辑:TODO: YARN-1763:Handle RM failovers during the submitApplication call.
rmClient.submitApplication(request);int pollCount =0;long startTime =System.currentTimeMillis();while(true){try{YarnApplicationState state =getApplicationReport(applicationId).getYarnApplicationState();if(!state.equals(YarnApplicationState.NEW)&&!state.equals(YarnApplicationState.NEW_SAVING)){
LOG.info("Submitted application "+ applicationId);break;}long elapsedMillis =System.currentTimeMillis()- startTime;if(enforceAsyncAPITimeout()&&
elapsedMillis >= asyncApiPollTimeoutMillis){thrownewYarnException("Timed out while waiting for application "+
applicationId +" to be submitted successfully");}// Notify the client through the log every 10 poll, in case the client// is blocked here too long.if(++pollCount %10==0){
LOG.info("Application submission is not finished, "+"submitted application "+ applicationId +" is still in "+ state);}try{Thread.sleep(submitPollIntervalMillis);}catch(InterruptedException ie){
LOG.error("Interrupted while waiting for application "+ applicationId
+" to be successfully submitted.");}}catch(ApplicationNotFoundException ex){// FailOver or RM restart happens before RMStateStore saves// ApplicationState
LOG.info("Re-submit application "+ applicationId +"with the "+"same ApplicationSubmissionContext");
rmClient.submitApplication(request);}}return applicationId;}