Gobblin CLI提供了在终端运行jar程序的功能,该功能通过脚本bin目录下的gobblin脚本实现。gobblin命令会自动寻找job的类路径,如果变量HADOOP_HOME被设置,gobblin会找到hadoop的类路径。bin/gobblin -h列出所有可用的参数。
bin/gobblin命令的使用方法如下:bin/gobblin run [listQuickApps] [<quick-app>] -jobName <jobName> [OPTIONS]
-listQuickApps:列出所有可用的应用列表
-qucik-app:应用名称
bin/gobblin run <quick-app-name> -h:列出应用的参数列表
开发自定义命令行应用
1.自定义类继承EmbeddedGobblin类
2.定义内部类CliFactory继承PublicMethodsGobblinCliFactory,复写方法constructEmbeddedGobblin(CommandLine cli),复写方法getUsageString()用来提示使用方法,
复写setTemplate(String templateURI)用来设置模板文件。
3.自定义类注解@Alias,gobblin会自动扫描到该Gobblin CLI类。
实例:EmbeddedGobblinDistcp 用于分布式环境拷贝数据
/**
* Embedded version of distcp.
* Usage:
* new EmbeddedGobblinDistcp(new Path("/source"), new Path("/dest")).run();
*/
public class EmbeddedGobblinDistcp extends EmbeddedGobblin {
@Alias(value = "distcp", description = "Distributed copy between Hadoop compatibly file systems.")
public static class CliFactory extends PublicMethodsGobblinCliFactory {
public CliFactory() {
super(EmbeddedGobblinDistcp.class);
}
@Override
public EmbeddedGobblin constructEmbeddedGobblin(CommandLine cli) throws JobTemplate.TemplateException, IOException {
String[] leftoverArgs = cli.getArgs();
if (leftoverArgs.length != 2) {
throw new RuntimeException("Unexpected number of arguments.");
}
Path from = new Path(leftoverArgs[0]);
Path to = new Path(leftoverArgs[1]);
return new EmbeddedGobblinDistcp(from, to);
}
@Override
public String getUsageString() {
return "[OPTIONS] <source> <target>";
}
}
public EmbeddedGobblinDistcp(Path from, Path to) throws JobTemplate.TemplateException, IOException {
super("Distcp");
try {
setTemplate(ResourceBasedJobTemplate.forResourcePath("templates/distcp.template"));
} catch (URISyntaxException | SpecNotFoundException exc) {
throw new RuntimeException("Could not instantiate an " + EmbeddedGobblinDistcp.class.getName(), exc);
}
this.setConfiguration("from", from.toString());
this.setConfiguration("to", to.toString());
// Infer source and target fs uris from the input paths
this.setConfiguration(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, from.getFileSystem(new Configuration()).getUri().toString());
this.setConfiguration(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, to.getFileSystem(new Configuration()).getUri().toString());
// add gobblin-data-management jar to distributed jars
this.distributeJar(ClassUtil.findContainingJar(CopySource.class));
}
/**
* Specifies that files in the target should be updated if they have changed in the source. Equivalent to -update
* option in Hadoop distcp.
*/
@EmbeddedGobblinCliOption(description = "Specifies files should be updated if they're different in the source.")
public EmbeddedGobblinDistcp update() {
this.setConfiguration(RecursiveCopyableDataset.UPDATE_KEY, Boolean.toString(true));
return this;
}
/**
* Specifies that files in the target that don't exist in the source should be deleted. Equivalent to -delete
* option in Hadoop distcp.
*/
@EmbeddedGobblinCliOption(description = "Delete files in target that don't exist on source.")
public EmbeddedGobblinDistcp delete() {
this.setConfiguration(RecursiveCopyableDataset.DELETE_KEY, Boolean.toString(true));
return this;
}
/**
* If {@link #delete()} is used, specifies that newly empty parent directories should also be deleted.
*/
@EmbeddedGobblinCliOption(description = "If deleting files on target, also delete newly empty parent directories.")
public EmbeddedGobblinDistcp deleteEmptyParentDirectories() {
this.setConfiguration(RecursiveCopyableDataset.DELETE_EMPTY_DIRECTORIES_KEY, Boolean.toString(true));
return this;
}
/**
* Run in simulate mode. Will log everythin it would copy, but not actually copy anything.
*/
public EmbeddedGobblinDistcp simulate() {
this.setConfiguration(CopySource.SIMULATE, Boolean.toString(true));
return this;
}
// Remove template from CLI
@Override
@NotOnCli
public EmbeddedGobblin setTemplate(String templateURI)
throws URISyntaxException, SpecNotFoundException, JobTemplate.TemplateException {
return super.setTemplate(templateURI);
}
}