flink源码分析1-1--yarnPerJob任务提交流程--启动命令解析&读配置&封装

flink源码分析1-1–yarnPerJob任务提交流程–启动命令解析&读配置&封装

1、命令行:提交任务命令
##提交任务的命令行
./bin/flink run -m yarn-cluster -ynm liufen_contry_jzhk -ys 5 --parallelism 2 -yD yarn.containers.vcores=1 -c app.JZHK ./liufen_contry-jzhk.jar


##根据命令行,可以知道提交flink任务是启动了一个名为flink的脚本,然后后面跟了一堆参数
# Add Client-specific JVM options
FLINK_ENV_JAVA_OPTS="${FLINK_ENV_JAVA_OPTS} ${FLINK_ENV_JAVA_OPTS_CLI}"


#####这句代码是真正执行入口Java类的代码--CliFrontend,前面是读取配置和解析的逻辑
# Add HADOOP_CLASSPATH to allow the usage of Hadoop file systems
exec $JAVA_RUN $JVM_ARGS $FLINK_ENV_JAVA_OPTS "${log_setting[@]}" -classpath "`manglePathList "$CC_CLASSPATH:$INTERNAL_HADOOP_CLASSPATHS"`" org.apache.flink.client.cli.CliFrontend "$@"


2、入口Java类:CliFrontend
/*
 *当前类:org.apache.flink.client.cli.CliFrontend
 */
    public static void main(final String[] args) {
		EnvironmentInformation.logEnvironmentInfo(LOG, "Command Line Client", args);

		// 1. find the configuration directory
		/*TODO 获取flink的conf目录的路径*/
		final String configurationDirectory = getConfigurationDirectoryFromEnv();

		// 2. load the global configuration
		/*TODO 根据conf路径,加载配置*/
		final Configuration configuration = GlobalConfiguration.loadConfiguration(configurationDirectory);

		// 3. load the custom command lines
		/*TODO 根据之前获取的conf路径(configurationDirectory)和用conf路径拿到的配置属性对象(configuration)封装命令行接口:按顺序Generic、Yarn、Default*/
		final List<CustomCommandLine> customCommandLines = loadCustomCommandLines(
			configuration,
			configurationDirectory);

		try {
            //拿到配置属性和包含配置的命令行列表,创建CliFrontend对象(Cli前端)
			final CliFrontend cli = new CliFrontend(
				configuration,
				customCommandLines);

			//可能是根据安全配置属性加载上下文的动作
            SecurityUtils.install(new SecurityConfiguration(cli.configuration));
            //调用runSecured,启动上下文,runSecured函数当中传入lambda表达式--CliFrontend对象cli调用cli.parseAndRun(args)方法,传入命令行当中包含的参数,解析并运行,以此为入口在yarn当中部署集群并提交运行任务
			int retCode = SecurityUtils.getInstalledContext()
					.runSecured(() -> cli.parseAndRun(args));
			System.exit(retCode);
		}
		catch (Throwable t) {
			final Throwable strippedThrowable = ExceptionUtils.stripException(t, UndeclaredThrowableException.class);
			LOG.error("Fatal error while running command line interface.", strippedThrowable);
			strippedThrowable.printStackTrace();
			System.exit(31);
		}
	}
/*
 *当前类:org.apache.flink.client.cli.CliFrontend
 *Clifrontend.main()->cli.parseAndRun(args)
 */
	public int parseAndRun(String[] args) {

		// check for action
		if (args.length < 1) {
			CliFrontendParser.printHelp(customCommandLines);
			System.out.println("Please specify an action.");
			return 1;
		}

		// get action,把之前传进来的第一个参数,作为action
		String action = args[0];

		// remove action from parameters(从参数当中拿走第一个参数action,因为已经读取完了,剩下的传到run方法当中去)
		final String[] params = Arrays.copyOfRange(args, 1, args.length);

		try {
			// do action
			switch (action) {
				case ACTION_RUN:
                    //之前解析到的第一个参数是“run”,第一个参数作为action,触发case when当中的ACTION_RUN
					run(params);
					return 0;
				case ACTION_RUN_APPLICATION:
					runApplication(params);
					return 0;
				case ACTION_LIST:
					list(params);
					return 0;
                ……………………
				case "-h":
				case "--help":
					CliFrontendParser.printHelp(customCommandLines);
					return 0;
				case "-v":
				case "--version":
					String version = EnvironmentInformation.getVersion();
					String commitID = EnvironmentInformation.getRevisionInformation().commitId;
					System.out.print("Version: " + version);
					System.out.println(commitID.equals(EnvironmentInformation.UNKNOWN) ? "" : ", Commit ID: " + commitID);
					return 0;
				default:
					System.out.printf("\"%s\" is not a valid action.\n", action);
					System.out.println();
					System.out.println("Valid actions are \"run\", \"list\", \"info\", \"savepoint\", \"stop\", or \"cancel\".");
					System.out.println();
					System.out.println("Specify the version option (-v or --version) to print Flink version.");
					System.out.println();
					System.out.println("Specify the help option (-h or --help) to get help on the command.");
					return 1;
			}
		} catch (CliArgsException ce) {
			return handleArgException(ce);
		} catch (ProgramParametrizationException ppe) {
			return handleParametrizationException(ppe);
		} catch (ProgramMissingJobException pmje) {
			return handleMissingJobException();
		} catch (Exception e) {
			return handleError(e);
		}
	}
/*
 *当前类:org.apache.flink.client.cli.CliFrontend
 *Clifrontend.main(args)->cli.parseAndRun(args)->run
 */
	protected void run(String[] args) throws Exception {
		LOG.info("Running 'run' command.");

		/*TODO 获取run动作,默认的配置项:-j,-c,-C,-yd,-py,-d等*/
		final Options commandOptions = CliFrontendParser.getRunCommandOptions();

		/*TODO 根据用户指定的配置项,进行解析,最后返回commandLine对象。这个动作会合并默认配置项和customCommandLineOptions,然后将合并后的commandLineOptions和传入的args一起传入parse()方法,getCommandLine()->CliFrontendParser.parse()->DefaultParser.parse()->DefaultParser.handleToken():handleToken当中的各种情况点进去流程都差不多,去掉空格然后循环匹配,拿到machingOption的列表,然后添加到CommandLine的cmd对象里面*/
		final CommandLine commandLine = getCommandLine(commandOptions, args, true);

		// evaluate help flag
		if (commandLine.hasOption(HELP_OPTION.getOpt())) {
			CliFrontendParser.printHelpForRun(customCommandLines);
			return;
		}

		/*TODO 根据之前添加的顺序,挨个判断是否active:Generic、Yarn、Default*/
		final CustomCommandLine activeCommandLine =
				validateAndGetActiveCommandLine(checkNotNull(commandLine));

		//根据之前解析出来的commandline对象,创建程序执行的选项:比如是否是Python模式,把CLASS_OPTION赋值给entryPointClass(入口类选项,Java下入口就是main方法)
		final ProgramOptions programOptions = ProgramOptions.create(commandLine);

		/*TODO 获取 用户的jar包和其他依赖*/
		final List<URL> jobJars = getJobJarAndDependencies(programOptions);

		/*TODO 获取有效配置:HA的id、Target(session、per-job)、JobManager内存、TaskManager内存、每个TM的slot数...*/
		final Configuration effectiveConfiguration = getEffectiveConfiguration(
				activeCommandLine, commandLine, programOptions, jobJars);

		LOG.debug("Effective executor configuration: {}", effectiveConfiguration);

		//封装成program,包含有执行选项,配置,jarFile等等……这一步动作就会创建PackagedProgram program对象,里面有return PackagedProgram.newBuilder().setJarFile(jarFile).setUserClassPaths(classpaths).setEntryPointClassName(entryPointClass).setConfiguration(configuration).setSavepointRestoreSettings(runOptions.getSavepointRestoreSettings()).setArguments(programArgs).build();
		final PackagedProgram program = getPackagedProgram(programOptions, effectiveConfiguration);

		try {
			/*TODO 执行程序*/
			executeProgram(effectiveConfiguration, program);
		} finally {
			program.deleteExtractedLibraries();
		}
	}
/*TODO 根据之前添加的顺序,挨个判断是否active:Generic、Yarn、Default*/
		final CustomCommandLine activeCommandLine =
				validateAndGetActiveCommandLine(checkNotNull(commandLine));
/*TODO 根据之前添加的顺序:来自于CliFrontend中的:final List<CustomCommandLine> customCommandLines =       		     *loadCustomCommandLines(
  *			configuration,
  *			configurationDirectory);
  */
	public static List<CustomCommandLine> loadCustomCommandLines(Configuration configuration, String configurationDirectory) {
		List<CustomCommandLine> customCommandLines = new ArrayList<>();
		customCommandLines.add(new GenericCLI(configuration, configurationDirectory));

		//	Command line interface of the YARN session, with a special initialization here
		//	to prefix all options with y/yarn.
		final String flinkYarnSessionCLI = "org.apache.flink.yarn.cli.FlinkYarnSessionCli";
		try {
			customCommandLines.add(
				loadCustomCommandLine(flinkYarnSessionCLI,
					configuration,
					configurationDirectory,
					"y",
					"yarn"));
		} catch (NoClassDefFoundError | Exception e) {
			final String errorYarnSessionCLI = "org.apache.flink.yarn.cli.FallbackYarnSessionCli";
			try {
				LOG.info("Loading FallbackYarnSessionCli");
				customCommandLines.add(
						loadCustomCommandLine(errorYarnSessionCLI, configuration));
			} catch (Exception exception) {
				LOG.warn("Could not load CLI class {}.", flinkYarnSessionCLI, e);
			}
		}

		//	Tips: DefaultCLI must be added at last, because getActiveCustomCommandLine(..) will get the
		//	      active CustomCommandLine in order and DefaultCLI isActive always return true.
		customCommandLines.add(new DefaultCLI());

		return customCommandLines;
	}
//现在回到run方法当中:validateAndGetActiveCommandLine()

/*
 *当前类:org.apache.flink.client.cli.CliFrontend
 *Clifrontend.main(args)->cli.parseAndRun(args)->run->validateAndGetActiveCommandLine()
 */
	public CustomCommandLine validateAndGetActiveCommandLine(CommandLine commandLine) {
		LOG.debug("Custom commandlines: {}", customCommandLines);
		for (CustomCommandLine cli : customCommandLines) {
			LOG.debug("Checking custom commandline {}, isActive: {}", cli, cli.isActive(commandLine));
			if (cli.isActive(commandLine)) {
				return cli;
			}
		}
		throw new IllegalStateException("No valid command-line found.");
	}

/*
 *当前类:org.apache.flink.yarn.cli.AbstractYarnCli
 *Clifrontend.main(args)->cli.parseAndRun(args)->run->validateAndGetActiveCommandLine()->CustomCommandLine.isActive(commandLine)->FlinkYarnSessionCli.isActive(commandLine)->AbstractYarnCli.isActive(commandLine)
 */
	public boolean isActive(CommandLine commandLine) {
		final String jobManagerOption = commandLine.getOptionValue(addressOption.getOpt(), null);
		/*TODO ID是固定的字符串 "yarn-cluster"*/
		final boolean yarnJobManager = ID.equals(jobManagerOption);
		/*TODO 判断是否存在 Yarn Session对应的 AppID*/
		final boolean hasYarnAppId = commandLine.hasOption(applicationId.getOpt())
			|| configuration.getOptional(YarnConfigOptions.APPLICATION_ID).isPresent();
		final boolean hasYarnExecutor = YarnSessionClusterExecutor.NAME.equalsIgnoreCase(configuration.get(DeploymentOptions.TARGET))
			|| YarnJobClusterExecutor.NAME.equalsIgnoreCase(configuration.get(DeploymentOptions.TARGET));
		/*TODO -m yarn-cluster || yarn有appID,或者命令行指定了 || 执行器是yarn的*/
		return hasYarnExecutor || yarnJobManager || hasYarnAppId;
	}
//validateAndGetActiveCommandLine()方法会把main方法中用配置属性configuration和配置文件路径封装出来的按顺序的customCommandLine按顺序遍历一遍,遍历出来的每一个cli对象会根据解析出来的commandLine对象来判断当前的命令行接口是否活跃,如果活跃直接返回当前的命令行,结合源码来看,总是会优先返回yarn模式
//final修饰的customCommandLine和后来传入的commandLine之间的关系:customCommandLine这个是main方法中构建final CliFrontend cli = new CliFrontend()直接赋值的,它是根据配置文件,按顺序封装了Generic,yarn和default几个命令行接口,commandline是解析的用户输入的命令行

//此时再次回到run方法当中:Clifrontend.run()
//final ProgramOptions programOptions = ProgramOptions.create(commandLine);
/*
 *当前类:org.apache.flink.client.cli.ProgramOptions
 *Clifrontend.main(args)->cli.parseAndRun(args)->run->ProgramOptions.create(commandLine)->new ProgramOptions(line)
 */
//接CliFrontend的235行,调用create方法,先判断是否是Python模式,不是则继续调ProgramOptions(line),这个line是上一步传进来的解析后的命令行对象
	public static ProgramOptions create(CommandLine line) throws CliArgsException {
		if (isPythonEntryPoint(line) || containsPythonDependencyOptions(line)) {
			return createPythonProgramOptions(line);
		} else {
			return new ProgramOptions(line);
		}
	}

	//接本类179行,开始创建执行选项对象:(后面获取jar包和依赖的时候也会获取entrypoint值和jarfilepath值)
	protected ProgramOptions(CommandLine line) throws CliArgsException {
		super(line);

		//此处获取-c参数后指定的flink程序入口类(用于jar包的pom文件里manifest未指定的情况)
		this.entryPointClass = line.hasOption(CLASS_OPTION.getOpt()) ?
			line.getOptionValue(CLASS_OPTION.getOpt()) : null;

		//此处获取-j参数后指定的jar包
		this.jarFilePath = line.hasOption(JAR_OPTION.getOpt()) ?
			line.getOptionValue(JAR_OPTION.getOpt()) : null;

		this.programArgs = extractProgramArgs(line);

		//此处判断,获取类路径列表
		List<URL> classpaths = new ArrayList<URL>();
		if (line.hasOption(CLASSPATH_OPTION.getOpt())) {
			for (String path : line.getOptionValues(CLASSPATH_OPTION.getOpt())) {
				try {
					classpaths.add(new URL(path));
				} catch (MalformedURLException e) {
					throw new CliArgsException("Bad syntax for classpath: " + path);
				}
			}
		}
		this.classpaths = classpaths;

		if (line.hasOption(PARALLELISM_OPTION.getOpt())) {
			String parString = line.getOptionValue(PARALLELISM_OPTION.getOpt());
			try {
				parallelism = Integer.parseInt(parString);
				if (parallelism <= 0) {
					throw new NumberFormatException();
				}
			}
			catch (NumberFormatException e) {
				throw new CliArgsException("The parallelism must be a positive number: " + parString);
			}
		}
		else {
			parallelism = ExecutionConfig.PARALLELISM_DEFAULT;
		}

		detachedMode = line.hasOption(DETACHED_OPTION.getOpt()) || line.hasOption(YARN_DETACHED_OPTION.getOpt());
		shutdownOnAttachedExit = line.hasOption(SHUTDOWN_IF_ATTACHED_OPTION.getOpt());

		this.savepointSettings = CliFrontendParser.createSavepointRestoreSettings(line);
	}

//此时再次回到run方法当中:Clifrontend.run()
//final List<URL> jobJars = getJobJarAndDependencies(programOptions);
	/**
	 * Get all provided libraries needed to run the program from the ProgramOptions.
	 */
	private List<URL> getJobJarAndDependencies(ProgramOptions programOptions) throws CliArgsException {
		//拿指定的jar包的入口点main方法和jar包路径
		String entryPointClass = programOptions.getEntryPointClassName();
		String jarFilePath = programOptions.getJarFilePath();

		try {
			//拿之前指定的应用jar包
			File jarFile = jarFilePath != null ? getJarFile(jarFilePath) : null;
			return PackagedProgram.getJobJarAndDependencies(jarFile, entryPointClass);
		} catch (FileNotFoundException | ProgramInvocationException e) {
			throw new CliArgsException("Could not get job jar and dependencies from JAR file: " + e.getMessage(), e);
		}
	}
/*
 *当前类:org.apache.flink.client.program.PackagedProgram
 *Clifrontend.main(args)->cli.parseAndRun(args)->run->getJobJarAndDependencies(programOptions)->PackagedProgram.getJobJarAndDependencies(jarFile, entryPointClass)
 */
	/**
	 * Returns all provided libraries needed to run the program.
	 */
	//接CliFrontend的266行,返回jar包和依赖的url列表
	public static List<URL> getJobJarAndDependencies(File jarFile, @Nullable String entryPointClassName) throws ProgramInvocationException {
		URL jarFileUrl = loadJarFile(jarFile);

		List<File> extractedTempLibraries = jarFileUrl == null ? Collections.emptyList() : extractContainedLibraries(jarFileUrl);

		List<URL> libs = new ArrayList<URL>(extractedTempLibraries.size() + 1);

		if (jarFileUrl != null) {
			libs.add(jarFileUrl);
		}
		for (File tmpLib : extractedTempLibraries) {
			try {
				libs.add(tmpLib.getAbsoluteFile().toURI().toURL());
			} catch (MalformedURLException e) {
				throw new RuntimeException("URL is invalid. This should not happen.", e);
			}
		}

		if (isPython(entryPointClassName)) {
			libs.add(PackagedProgramUtils.getPythonJar());
		}

		return libs;
	}

//此时再次回到run方法当中:Clifrontend.run()
//final Configuration effectiveConfiguration = getEffectiveConfiguration(activeCommandLine, commandLine, programOptions, jobJars);
	private <T> Configuration getEffectiveConfiguration(
			final CustomCommandLine activeCustomCommandLine,
			final CommandLine commandLine) throws FlinkException {

		final Configuration effectiveConfiguration = new Configuration(configuration);

		final Configuration commandLineConfiguration =
				checkNotNull(activeCustomCommandLine).toConfiguration(commandLine);

		effectiveConfiguration.addAll(commandLineConfiguration);

		return effectiveConfiguration;
	}
/*
 *当前类:org.apache.flink.yarn.cli.FlinkYarnSession
 *Clifrontend.main(args)->cli.parseAndRun(args)->run->getEffectiveConfiguration(activeCommandLine, commandLine, programOptions, jobJars)->FlinkYarnSession.toConfiguration(commandLine)
 */
	@Override
	public Configuration toConfiguration(CommandLine commandLine) throws FlinkException {
		// we ignore the addressOption because it can only contain "yarn-cluster"
		final Configuration effectiveConfiguration = new Configuration();

		applyDescriptorOptionToConfig(commandLine, effectiveConfiguration);

		final ApplicationId applicationId = getApplicationId(commandLine);
		if (applicationId != null) {
			final String zooKeeperNamespace;
			if (commandLine.hasOption(zookeeperNamespace.getOpt())){
				zooKeeperNamespace = commandLine.getOptionValue(zookeeperNamespace.getOpt());
			} else {
				zooKeeperNamespace = effectiveConfiguration.getString(HA_CLUSTER_ID, applicationId.toString());
			}

			effectiveConfiguration.setString(HA_CLUSTER_ID, zooKeeperNamespace);
			effectiveConfiguration.setString(YarnConfigOptions.APPLICATION_ID, ConverterUtils.toString(applicationId));
            // TARGET 就是 execution.target,目标执行器
            //决定后面什么类型的执行器提交任务:yarn-session、yarn-per-job
			effectiveConfiguration.setString(DeploymentOptions.TARGET, YarnSessionClusterExecutor.NAME);
		} else {
			effectiveConfiguration.setString(DeploymentOptions.TARGET, YarnJobClusterExecutor.NAME);
		}

		if (commandLine.hasOption(jmMemory.getOpt())) {
			String jmMemoryVal = commandLine.getOptionValue(jmMemory.getOpt());
			if (!MemorySize.MemoryUnit.hasUnit(jmMemoryVal)) {
				jmMemoryVal += "m";
			}
			effectiveConfiguration.set(JobManagerOptions.TOTAL_PROCESS_MEMORY, MemorySize.parse(jmMemoryVal));
		}

		if (commandLine.hasOption(tmMemory.getOpt())) {
			String tmMemoryVal = commandLine.getOptionValue(tmMemory.getOpt());
			if (!MemorySize.MemoryUnit.hasUnit(tmMemoryVal)) {
				tmMemoryVal += "m";
			}
			effectiveConfiguration.set(TaskManagerOptions.TOTAL_PROCESS_MEMORY, MemorySize.parse(tmMemoryVal));
		}

		if (commandLine.hasOption(slots.getOpt())) {
			effectiveConfiguration.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, Integer.parseInt(commandLine.getOptionValue(slots.getOpt())));
		}

		dynamicPropertiesEncoded = encodeDynamicProperties(commandLine);
		if (!dynamicPropertiesEncoded.isEmpty()) {
			Map<String, String> dynProperties = getDynamicProperties(dynamicPropertiesEncoded);
			for (Map.Entry<String, String> dynProperty : dynProperties.entrySet()) {
				effectiveConfiguration.setString(dynProperty.getKey(), dynProperty.getValue());
			}
		}

		if (isYarnPropertiesFileMode(commandLine)) {
			return applyYarnProperties(effectiveConfiguration);
		} else {
			return effectiveConfiguration;
		}
	}


//此时再次回到run方法当中:Clifrontend.run()
//executeProgram(effectiveConfiguration, program);至此,各项配置属性和执行选项包括jar包类路径都已经加载完毕,准备执行用户的main方法。
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

luyunlong_it

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值