文章目录
针对flink-1.7-release
1. 引言
Flink客户端通过命令行的形式提交任务时,即执行./flink run xxxx.jar
时,真正执行的run逻辑如下:
protected void run(String[] args) throws Exception {
LOG.info("Running 'run' command.");
final Options commandOptions = CliFrontendParser.getRunCommandOptions();
final Options commandLineOptions = CliFrontendParser.mergeOptions(commandOptions, customCommandLineOptions);
final CommandLine commandLine = CliFrontendParser.parse(commandLineOptions, args, true);
// 运行参数
final RunOptions runOptions = new RunOptions(commandLine);
// 判断下是否是help操作
if (runOptions.isPrintHelp()) {
CliFrontendParser.printHelpForRun(customCommandLines);
return;
}
// 必须要指定任务的jar包路径
if (runOptions.getJarFilePath() == null) {
throw new CliArgsException("The program JAR file was not specified.");
}
// 初始化打包的任务执行程序
final PackagedProgram program;
try {
LOG.info("Building program from JAR file");
program = buildProgram(runOptions);
}
catch (FileNotFoundException e) {
throw new CliArgsException("Could not build the program from JAR file.", e);
}
final CustomCommandLine<?> customCommandLine = getActiveCustomCommandLine(commandLine);
try {
// 执行任务程序
runProgram(customCommandLine, commandLine, runOptions, program);
} finally {
program.deleteExtractedLibraries();
}
}
这段逻辑主要做了两件事情,首先,就是构建执行程序包;然后执行任务程序包。我们本文就重点分析下:program = buildProgram(runOptions);
这段逻辑。怎样通过run的命令行参数来打包程序。
2. buildProgram的执行逻辑
继续分析 buildProgram(runOptions)的逻辑:
/**
* 从给定的命令行参数中创建打包程序
*
* @return A PackagedProgram (upon success)
*/
PackagedProgram buildProgram(ProgramOptions options) throws FileNotFoundException, ProgramInvocationException {
// 程序运行参数
String[] programArgs = options.getProgramArgs();
// 程序jar包路径
String jarFilePath = options.getJarFilePath();
// 依赖的classpaths
List<URL> classpaths = options.getClasspaths();
// 程序jar包不能为空
if (jarFilePath == null) {
throw new IllegalArgumentException("The program JAR file was not specified.");
}
File jarFile = new File(jarFilePath);
// 程序jar包路径必须存在
if (!jarFile.exists()) {
throw new FileNotFoundException("JAR file does not exist: " + jarFile);
}
else if (!jarFile.isFile()) {
throw new FileNotFoundException("JAR file is not a file: " + jarFile);
}
// 程序执行入口函数
String entryPointClass = options.getEntryPointClassName();
// 打包
PackagedProgram program = entryPointClass == null ?
new PackagedProgram(jarFile, classpaths, programArgs) :
new PackagedProgram(jarFile, classpaths, entryPointClass, programArgs);
// 设置包的SavePoint
program.setSavepointRestoreSettings(options.getSavepointRestoreSettings());
return program;
}
这里我们将继续重点分析new PackagedProgram
这个实例。这里根据是否指定了程序执行入口,利用不同的构造函数来实例化PackagedProgram。其实道理是一样的,我们就只分析new PackagedProgram(jarFile, classpaths, entryPointClass, programArgs)
这种情况。继续分析:
/**
* 创建包含执行包的实例
*
* Creates an instance that wraps the plan defined in the jar file using the given
* arguments. For generating the plan the class defined in the className parameter
* is used.
*
* @param jarFile
* The jar file which contains the plan.
* @param classpaths
* Additional classpath URLs needed by the Program.
* @param entryPointClassName
* Name of the class which generates the plan. Overrides the class defined
* in the jar file manifest
* @param args
* Optional. The arguments used to create the pact plan, depend on
* implementation of the pact plan. See getDescription().
* @throws ProgramInvocationException
* This invocation is thrown if the Program can't be properly loaded. Causes
* may be a missing / wrong class or manifest files.
*/
public PackagedProgram(File jarFile, List<URL> classpaths, @Nullable String entryPointClassName, String... args) throws ProgramInvocationException {
// 程序的jar包不能为空
if (jarFile == null) {
throw new IllegalArgumentException("The jar file must not be null.");
}
URL jarFileUrl;
try {
// 程序jar包的绝对路径
jarFileUrl = jarFile.getAbsoluteFile().toURI().toURL();
} catch (MalformedURLException e1) {
throw new IllegalArgumentException("The jar file path is invalid.");
}
// 检验是否可以访问到程序jar包
checkJarFile(jarFileUrl);
this.jarFile = jarFileUrl;
this.args = args == null ? new String[0] : args;
// 1. 如果没有指定入口类名称,那么就从jar包里找出来。
if (entryPointClassName == null) {
entryPointClassName = getEntryPointClassNameFromJar(jarFileUrl);
}
// 2. 提取任务jar文件中所有依赖的JAR包
this.extractedTempLibraries = extractContainedLibraries(jarFileUrl);
this.classpaths = classpaths;
// 3. 用户代码的类加载器
this.userCodeClassLoader = JobWithJars.buildUserCodeClassLoader(getAllLibraries(), classpaths, getClass().getClassLoader());
// 通过入口类名称来真正加载执行入口类
this.mainClass = loadMainClass(entryPointClassName, userCodeClassLoader);
//如果执行入口是program的子类,那么直接实例化得到执行计划
// if the entry point is a program, instantiate the class and get the plan
if (Program.class.isAssignableFrom(this.mainClass)) {
Program prg = null;
try {
// 实例化程序
prg = InstantiationUtil.instantiate(this.mainClass.asSubclass(Program.class), Program.class);
} catch (Exception e) {
// 验证是否有main函数
if (!hasMainMethod(mainClass)) {
throw new ProgramInvocationException("The given program class implements the " +
Program.class.getName() + " interface, but cannot be instantiated. " +
"It also declares no main(String[]) method as alternative entry point", e);
}
} catch (Throwable t) {
throw new ProgramInvocationException("Error while trying to instantiate program class.", t);
}
this.program = prg;
} else if (hasMainMethod(mainClass)) {
this.program = null;
} else {
throw new ProgramInvocationException("The given program class neither has a main(String[]) method, nor does it implement the " +
Program.class.getName() + " interface.");
}
}
上述这段代码有几步我们要重点分析下。下面重点分析。
2.1. 如果没有指定入口类名称,那么就从jar包里找出来
当我们提交任务时,没有指定程序执行入口类名称,那么程序怎样从任务jar里找到程序执行入口类呢?代码如下:
/**
* @param jarFile 程序jar包
*/
private static String getEntryPointClassNameFromJar(URL jarFile) throws ProgramInvocationException {
JarFile jar;
Manifest manifest;
String className;
// 打开jar文件
try {
jar = new JarFile(new File(jarFile.toURI()));
} catch (URISyntaxException use) {
throw new ProgramInvocationException("Invalid file path '" + jarFile.getPath() + "'", use);
} catch (IOException ioex) {
throw new ProgramInvocationException("Error while opening jar file '" + jarFile.getPath() + "'. "
+ ioex.getMessage(), ioex);
}
// 注意:jar文件最后必须关闭
try {
// 读取jar manifest,就是获取到jar包里的MANIFEST.MF文件里的属性
try {
manifest = jar.getManifest();
} catch (IOException ioex) {
throw new ProgramInvocationException("The Manifest in the jar file could not be accessed '"
+ jarFile.getPath() + "'. " + ioex.getMessage(), ioex);
}
if (manifest == null) {
throw new ProgramInvocationException("No manifest found in jar file '" + jarFile.getPath() + "'. The manifest is need to point to the program's main class.");
}
// 得到manifest里的主要属性
Attributes attributes = manifest.getMainAttributes();
// 检查是否有 "program-class"这个属性,也就是入口。
className = attributes.getValue(PackagedProgram.MANIFEST_ATTRIBUTE_ASSEMBLER_CLASS);
if (className != null) {
return className;
}
// 检查"Main-class"属性
className = attributes.getValue(PackagedProgram.MANIFEST_ATTRIBUTE_MAIN_CLASS);
if (className != null) {
return className;
} else {
throw new ProgramInvocationException("Neither a '" + MANIFEST_ATTRIBUTE_MAIN_CLASS + "', nor a '" +
MANIFEST_ATTRIBUTE_ASSEMBLER_CLASS + "' entry was found in the jar file.");
}
}
finally {
try {
jar.close();
} catch (Throwable t) {
throw new ProgramInvocationException("Could not close the JAR file: " + t.getMessage(), t);
}
}
}
通过获取任务jar包里的MANIFEST.MF文件里的属性。然后如果里面配置有main入口所在类,那就就可以获取到入口类。 要不然还是会报异常。所以我们在提交任务的时候,其实为了保险起见,还是参数传入执行程序执行入口吧。
2.2. 提取任务jar文件中所有依赖的JAR包
分析extractContainedLibraries(jarFileUrl)
这段代码逻辑:
/**
* 取出程序JAR文件中所有包含的依赖JAR,并且把他们都放置到临时目录中
*
* @return 提取的临时文件的目录名称
* @throws ProgramInvocationException Thrown, if the extraction process failed.
*/
public static List<File> extractContainedLibraries(URL jarFile) throws ProgramInvocationException {
Random rnd = new Random();
JarFile jar = null;
try {
jar = new JarFile(new File(jarFile.toURI()));
final List<JarEntry> containedJarFileEntries = new ArrayList<JarEntry>();
Enumeration<JarEntry> entries = jar.entries();
while (entries.hasMoreElements()) {
JarEntry entry = entries.nextElement();
String name = entry.getName();
// 提取出jar包里面lib目录下的所有依赖jar包
if (name.length() > 8 && name.startsWith("lib/") && name.endsWith(".jar")) {
containedJarFileEntries.add(entry);
}
}
if (containedJarFileEntries.isEmpty()) {
return Collections.emptyList();
}
else {
// go over all contained jar files
final List<File> extractedTempLibraries = new ArrayList<File>(containedJarFileEntries.size());
final byte[] buffer = new byte[4096];
boolean incomplete = true;
try {
for (int i = 0; i < containedJarFileEntries.size(); i++) {
final JarEntry entry = containedJarFileEntries.get(i);
String name = entry.getName();
// '/' as in case of zip, jar
// java.util.zip.ZipEntry#isDirectory always looks only for '/' not for File.separator
name = name.replace('/', '_');
File tempFile;
try {
tempFile = File.createTempFile(rnd.nextInt(Integer.MAX_VALUE) + "_", name);
tempFile.deleteOnExit();
}
catch (IOException e) {
throw new ProgramInvocationException(
"An I/O error occurred while creating temporary file to extract nested library '" +
entry.getName() + "'.", e);
}
extractedTempLibraries.add(tempFile);
// copy the temp file contents to a temporary File
OutputStream out = null;
InputStream in = null;
try {
out = new FileOutputStream(tempFile);
in = new BufferedInputStream(jar.getInputStream(entry));
int numRead = 0;
while ((numRead = in.read(buffer)) != -1) {
out.write(buffer, 0, numRead);
}
}
catch (IOException e) {
throw new ProgramInvocationException("An I/O error occurred while extracting nested library '"
+ entry.getName() + "' to temporary file '" + tempFile.getAbsolutePath() + "'.");
}
finally {
if (out != null) {
out.close();
}
if (in != null) {
in.close();
}
}
}
incomplete = false;
}
finally {
if (incomplete) {
deleteExtractedLibraries(extractedTempLibraries);
}
}
return extractedTempLibraries;
}
}
catch (Throwable t) {
throw new ProgramInvocationException("Unknown I/O error while extracting contained jar files.", t);
}
finally {
if (jar != null) {
try {
jar.close();
} catch (Throwable t) {}
}
}
}
2.3. 通过入口类名称来真正加载执行入口类
重点分析this.mainClass = loadMainClass(entryPointClassName, userCodeClassLoader);
private static Class<?> loadMainClass(String className, ClassLoader cl) throws ProgramInvocationException {
ClassLoader contextCl = null;
try {
contextCl = Thread.currentThread().getContextClassLoader();
Thread.currentThread().setContextClassLoader(cl);
// 已知jar包执行入口类名称,然后利用类加载器把其加载进内存中来。
return Class.forName(className, false, cl);
}
catch (ClassNotFoundException e) {
throw new ProgramInvocationException("The program's entry point class '" + className
+ "' was not found in the jar file.", e);
}
catch (ExceptionInInitializerError e) {
throw new ProgramInvocationException("The program's entry point class '" + className
+ "' threw an error during initialization.", e);
}
catch (LinkageError e) {
throw new ProgramInvocationException("The program's entry point class '" + className
+ "' could not be loaded due to a linkage failure.", e);
}
catch (Throwable t) {
throw new ProgramInvocationException("The program's entry point class '" + className
+ "' caused an exception during initialization: " + t.getMessage(), t);
} finally {
if (contextCl != null) {
Thread.currentThread().setContextClassLoader(contextCl);
}
}
}