源代码路径:org.pentaho.di.kitchen.Kitchen
Kitchen是kettle用来启动job的工具,用户可以通过Kitchen.sh脚本执行Job任务。现在我们来看Kitchen是如何运行一个Job的。
打开Kitchen源码进入main()方法我们首先看到其初始化了一个ExecutorService,该executor的主要作用是用来初始化kettle环境。
final ExecutorService executor = ExecutorUtil.getExecutor();
final RepositoryPluginType repositoryPluginType = RepositoryPluginType.getInstance();
//两次submit初始化环境
final Future<Map.Entry<KettlePluginException, Future<KettleException>>> repositoryRegisterFuture =
executor.submit( new Callable<Map.Entry<KettlePluginException, Future<KettleException>>>() {
@Override
public Map.Entry<KettlePluginException, Future<KettleException>> call() throws Exception {
PluginRegistry.addPluginType( repositoryPluginType );
try {
//主要构建.kettle目录以及其下的kettle.properties文件,同时加载一些基本插件类型
KettleClientEnvironment.init();
} catch ( KettlePluginException e ) {
return new AbstractMap.SimpleImmutableEntry<KettlePluginException, Future<KettleException>>( e, null );
}
Future<KettleException> kettleEnvironmentInitFuture =
executor.submit( new Callable<KettleException>() {
@Override
public KettleException call() throws Exception {
try {
KettleClientEnvironment.getInstance().setClient( KettleClientEnvironment.ClientType.KITCHEN );
//加载扩展插件类型,并初始化kettle基本变量的值
KettleEnvironment.init();
} catch ( KettleException e ) {
return e;
}
return null;
}
} );
return new AbstractMap.SimpleImmutableEntry<KettlePluginException, Future<KettleException>>
( null, kettleEnvironmentInitFuture );
}
} );
其中KettleClientEnvironment调用的init()方法主要作用是如果在用户目录下没有.kettle目录及该目录下没有kettle.properties文件将进行构建,同时加载kettle.properties中的配置属性到系统中。KettleEnvironment的init()方法主要加载并初始化engin包下kettle-variables.xml中配置的属性。
其二构建用户元信息存储目录。
DelegatingMetaStore metaStore = new DelegatingMetaStore();
metaStore.addMetaStore( MetaStoreConst.openLocalPentahoMetaStore() );
metaStore.setActiveMetaStoreName( metaStore.getName() );
以上三行代码加载的是以用户目录下的.pentaho目录为元信息存储目录。
接下来第三是解析用户数据参数。用户所有可输入参数如下:
CommandLineOption maxLogLinesOption =
new CommandLineOption(
"maxloglines", BaseMessages.getString( PKG, "Kitchen.CmdLine.MaxLogLines" ), new StringBuffer() );
CommandLineOption maxLogTimeoutOption =
new CommandLineOption(
"maxlogtimeout", BaseMessages.getString( PKG, "Kitchen.CmdLine.MaxLogTimeout" ), new StringBuffer() );
CommandLineOption[] options =
new CommandLineOption[]{
new CommandLineOption( "rep", BaseMessages.getString( PKG, "Kitchen.CmdLine.RepName" ), optionRepname =
new StringBuffer() ),
new CommandLineOption(
"user", BaseMessages.getString( PKG, "Kitchen.CmdLine.RepUsername" ), optionUsername =
new StringBuffer() ),
new CommandLineOption(
"pass", BaseMessages.getString( PKG, "Kitchen.CmdLine.RepPassword" ), optionPassword =
new StringBuffer() ),
new CommandLineOption(
"job", BaseMessages.getString( PKG, "Kitchen.CmdLine.RepJobName" ), optionJobname =
new StringBuffer() ),
new CommandLineOption( "dir", BaseMessages.getString( PKG, "Kitchen.CmdLine.RepDir" ), optionDirname =
new StringBuffer() ),
new CommandLineOption(
"file", BaseMessages.getString( PKG, "Kitchen.CmdLine.XMLJob" ), optionFilename =
new StringBuffer() ),
new CommandLineOption(
"level", BaseMessages.getString( PKG, "Kitchen.CmdLine.LogLevel" ), optionLoglevel =
new StringBuffer() ),
new CommandLineOption(
"logfile", BaseMessages.getString( PKG, "Kitchen.CmdLine.LogFile" ), optionLogfile =
new StringBuffer() ),
new CommandLineOption(
"log", BaseMessages.getString( PKG, "Kitchen.CmdLine.LogFileOld" ), optionLogfileOld =
new StringBuffer(), false, true ),
new CommandLineOption(
"listdir", BaseMessages.getString( PKG, "Kitchen.CmdLine.ListDir" ), optionListdir =
new StringBuffer(), true, false ),
new CommandLineOption(
"listjobs", BaseMessages.getString( PKG, "Kitchen.CmdLine.ListJobsDir" ), optionListjobs =
new StringBuffer(), true, false ),
new CommandLineOption(
"listrep", BaseMessages.getString( PKG, "Kitchen.CmdLine.ListAvailableReps" ), optionListrep =
new StringBuffer(), true, false ),
new CommandLineOption( "norep", BaseMessages.getString( PKG, "Kitchen.CmdLine.NoRep" ), optionNorep =
new StringBuffer(), true, false ),
new CommandLineOption(
"version", BaseMessages.getString( PKG, "Kitchen.CmdLine.Version" ), optionVersion =
new StringBuffer(), true, false ),
new CommandLineOption(
"param", BaseMessages.getString( PKG, "Kitchen.ComdLine.Param" ), optionParams, false ),
new CommandLineOption(
"listparam", BaseMessages.getString( PKG, "Kitchen.ComdLine.ListParam" ), optionListParam =
new StringBuffer(), true, false ),
new CommandLineOption(
"export", BaseMessages.getString( PKG, "Kitchen.ComdLine.Export" ), optionExport =
new StringBuffer(), true, false ),
new CommandLineOption(
"custom", BaseMessages.getString( PKG, "Kitchen.ComdLine.Custom" ), customOptions, false ),
maxLogLinesOption, maxLogTimeoutOption, };
每个参数的作用都有介绍,这就不多说了。
第四是构建repository。
//该实例会以.kettle文件夹下的repositories.xml的内容作为所有repository的元信息
RepositoriesMeta repsinfo = new RepositoriesMeta();
repsinfo.getLog().setLogLevel( log.getLogLevel() );
try {
//加载并解析repositories.xml内容
repsinfo.readData();
} catch ( Exception e ) {
throw new KettleException( BaseMessages.getString( PKG, "Kitchen.Error.NoRepDefinied" ), e );
}
if ( log.isDebug() ) {
log.logDebug( BaseMessages.getString( PKG, "Kitchen.Log.FindingRep", "" + optionRepname ) );
}
//根据-rep参数找到repository元信息
repositoryMeta = repsinfo.findRepository( optionRepname.toString() );
if ( repositoryMeta != null ) {
// Define and connect to the repository...
if ( log.isDebug() ) {
log.logDebug( BaseMessages.getString( PKG, "Kitchen.Log.Alocate&ConnectRep" ) );
}
//通过反射构建repository
repository =
PluginRegistry.getInstance().loadClass(
RepositoryPluginType.class, repositoryMeta, Repository.class );
repository.init( repositoryMeta );
repository.getLog().setLogLevel( log.getLogLevel() );
repository.connect( optionUsername != null ? optionUsername.toString() : null, optionPassword != null
? optionPassword.toString() : null );
repository.getSecurityProvider().validateAction( RepositoryOperation.EXECUTE_JOB );
RepositoryDirectoryInterface directory = repository.loadRepositoryDirectoryTree(); // Default = root
// Add the IMetaStore of the repository to our delegation
//
if ( repository.getMetaStore() != null ) {
metaStore.addMetaStore( repository.getMetaStore() );
}
接下来是加载具体Job。具体也是根据路径先构造jobMeta元信息,然后在根据repository和jobMeta创建Job,需要注意的是一个Job的xml文件里面包含了job里面需要执行的每个entry,所以jobMeta也具有解析并实例每一个entry的能力,具体方法看jobMeta的loadXML()方法。
// Load a job
if ( !Const.isEmpty( optionJobname ) ) {
if ( log.isDebug() ) {
log.logDebug( BaseMessages.getString( PKG, "Kitchen.Log.LoadingJobInfo" ) );
}
blockAndThrow( kettleInitFuture );
jobMeta = repository.loadJob( optionJobname.toString(), directory, null, null ); // reads last version
if ( log.isDebug() ) {
log.logDebug( BaseMessages.getString( PKG, "Kitchen.Log.AllocateJob" ) );
}
job = new Job( repository, jobMeta );
}
最后就是执行Job了。具体代码如下:
//存储Job的执行结果
Result result = null;
int returnCode = 0;
try {
// Set the command line arguments on the job ...
if ( args.size() == 0 ) {
job.setArguments( null );
} else {
job.setArguments( args.toArray( new String[args.size()] ) );
}
//初始化具体Job实例
job.initializeVariablesFrom( null );
job.setLogLevel( log.getLogLevel() );
job.getJobMeta().setInternalKettleVariables( job );
job.setRepository( repository );
job.getJobMeta().setRepository( repository );
job.getJobMeta().setMetaStore( metaStore );
// Map the command line named parameters to the actual named parameters. Skip for
// the moment any extra command line parameter not known in the job.
String[] jobParams = jobMeta.listParameters();
for ( String param : jobParams ) {
String value = optionParams.getParameterValue( param );
if ( value != null ) {
job.getJobMeta().setParameterValue( param, value );
}
}
job.copyParametersFrom( job.getJobMeta() );
// Put the parameters over the already defined variable space. Parameters get priority.
//
job.activateParameters();
// Set custom options in the job extension map as Strings
//
for ( String optionName : customOptions.listParameters() ) {
String optionValue = customOptions.getParameterValue( optionName );
if ( optionName != null && optionValue != null ) {
job.getExtensionDataMap().put( optionName, optionValue );
}
}
// List the parameters defined in this job
// Then simply exit...
//
if ( "Y".equalsIgnoreCase( optionListParam.toString() ) ) {
for ( String parameterName : job.listParameters() ) {
String value = job.getParameterValue( parameterName );
String deflt = job.getParameterDefault( parameterName );
String descr = job.getParameterDescription( parameterName );
if ( deflt != null ) {
System.out.println( "Parameter: "
+ parameterName + "=" + Const.NVL( value, "" ) + ", default=" + deflt + " : "
+ Const.NVL( descr, "" ) );
} else {
System.out.println( "Parameter: "
+ parameterName + "=" + Const.NVL( value, "" ) + " : " + Const.NVL( descr, "" ) );
}
}
// stop right here...
//
exitJVM( 7 ); // same as the other list options
}
job.start();
job.waitUntilFinished();
result = job.getResult(); // Execute the selected job.
}