shell脚本分析
实例命令:hdfs dfs -ls
对应脚本文件:bin/hdfs
对应具体脚本内容:
COMMAND=$1
shift
elif [ "$COMMAND" = "dfs" ] ; then
CLASS=org.apache.hadoop.fs.FsShell
最终执行的命令:
exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
//其中,CLASS=org.apache.hadoop.fs.FsShell,$@=-ls
FsShell实现分析
当然是先看入口main
public static void main(String argv[]) throws Exception {
//new FsShell()
FsShell shell = newShellInstance();
//加载core-default.xml,core-site.xml
Configuration conf = new Configuration();
//不开启安静模式(安静模式下错误和提示信息不会被打印)
conf.setQuietMode(false);
shell.setConf(conf);
int res;
try {
//ToolRunner就是一个工具类,用于执行实现了接口`Tool`的类,FsShell是Tool的实现类
res = ToolRunner.run(shell, argv);
} finally {
shell.close();
}
System.exit(res);
}
ToolRunner类结合GenericOptionsParser类来解析命令行参数,
在运行上述ToolRunner.run(shell, argv)代码之后,经过一番解释之后,最后真正执行的仍然是类FsShell的run方法,而且对其参数进行了解析,run方法如下:
@Override
public int run(String argv[]) throws Exception {
// 初始化FsShll,包括注册命令类
init();
int exitCode = -1;
if (argv.length < 1) {
printUsage(System.err); //打印使用方法
} else {
String cmd = argv[0];//取到第一个参数,即 ls
Command instance = null;
try {
instance = commandFactory.getInstance(cmd); //实例化Command,即new Ls()
if (instance == null) {
throw new UnknownCommandException();
}
//调用LS的父类Command.run
exitCode = instance.run(Arrays.copyOfRange(argv, 1, argv.length));
} catch (IllegalArgumentException e) {
displayError(cmd, e.getLocalizedMessage());
if (instance != null) {
printInstanceUsage(System.err, instance);
}
} catch (Exception e) {
// instance.run catches IOE, so something is REALLY wrong if here
LOG.debug("Error", e);
displayError(cmd, "Fatal internal error");
e.printStackTrace(System.err);
}
}
return exitCode;
}
(1)重点分析一:init()如下:
protected void init() throws IOException {
getConf().setQuietMode(true); //开启安静模式
if (commandFactory == null) {
//实例话CommandFactory
commandFactory = new CommandFactory(getConf());
//为commandFactory注册命令-对象:help,usage (实例化了commandClass,放入commandFactory的objectMap中)
commandFactory.addObject(new Help(), "-help");
commandFactory.addObject(new Usage(), "-usage");
// 注册命令-class,调用registerCommands方法(未实例化commandClass,延迟实例化,在真正使用的时候在通过反射进行实例化,放入commandFactory的classMap中)
registerCommands(commandFactory);
}
}
protected void registerCommands(CommandFactory factory) {
// TODO: DFSAdmin subclasses FsShell so need to protect the command
// registration. This class should morph into a base class for
// commands, and then this method can be abstract
if (this.getClass().equals(FsShell.class)) {
// 调用CommandFactory类的registerCommands方法
// 注意,这里传的参数是类FsCommand
factory.registerCommands(FsCommand.class);
}
}
CommandFactory类的registerCommands方法如下:
public void registerCommands(Class<?> registrarClass) {
try {
// 这里触发的是类FsCommand的registerCommands方法
registrarClass.getMethod(
"registerCommands", CommandFactory.class
).invoke(null, this);
} catch (Exception e) {
throw new RuntimeException(StringUtils.stringifyException(e));
}
}
接下来,看看类CommandFactory的registerCommands方法,代码如下:
public static void registerCommands(CommandFactory factory) {
factory.registerCommands(AclCommands.class);
factory.registerCommands(CopyCommands.class);
factory.registerCommands(Count.class);
factory.registerCommands(Delete.class);
factory.registerCommands(Display.class);
factory.registerCommands(Find.class);
factory.registerCommands(FsShellPermissions.class);
factory.registerCommands(FsUsage.class);
factory.registerCommands(Ls.class);
factory.registerCommands(Mkdir.class);
factory.registerCommands(MoveCommands.class);
factory.registerCommands(SetReplication.class);
factory.registerCommands(Stat.class);
factory.registerCommands(Tail.class);
factory.registerCommands(Test.class);
factory.registerCommands(Touch.class);
factory.registerCommands(Truncate.class);
factory.registerCommands(SnapshotCommands.class);
factory.registerCommands(XAttrCommands.class);
}
我们再来看看Ls类
class Ls extends FsCommand {
public static void registerCommands(CommandFactory factory) {
factory.addClass(Ls.class, "-ls");
factory.addClass(Lsr.class, "-lsr");
}
(2)重点分析二:instance.run
调用父类Command的run方法:
* run
* |-> {@link #processOptions(LinkedList)}
* \-> {@link #processRawArguments(LinkedList)}
* |-> {@link #expandArguments(LinkedList)}
* | \-> {@link #expandArgument(String)}*
* \-> {@link #processArguments(LinkedList)}
* |-> {@link #processArgument(PathData)}*
* | |-> {@link #processPathArgument(PathData)}
* | \-> {@link #processPaths(PathData, PathData...)}
* | \-> {@link #processPath(PathData)}*
* \-> {@link #processNonexistentPath(PathData)}
public int run(String...argv) {
//将String...argv转化为list<string>
LinkedList<String> args = new LinkedList<String>(Arrays.asList(argv));
try {
if (isDeprecated()) {
displayWarning(
"DEPRECATED: Please use '"+ getReplacementCommand() + "' instead.");
}
//
processOptions(args);
processRawArguments(args);
} catch (IOException e) {
displayError(e);
}
return (numErrors == 0) ? exitCode : exitCodeForError();
}
processOptions(args);是调用子类Ls的processOptions(args);此方法主要做了两件事,一是处理配置选项,如-d(dirRecure,加上此参数就不递归目录了),-R(recurse,文件递归,加上此参数输出所有层级),-h(humanReadable),二是处理参数,如下:
@Override
protected void processOptions(LinkedList<String> args)
throws IOException {
CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE, "d", "h", "R");
cf.parse(args);
dirRecurse = !cf.getOpt("d");
setRecursive(cf.getOpt("R") && dirRecurse);
humanReadable = cf.getOpt("h");
if (args.isEmpty()) args.add(Path.CUR_DIR);
}
processRawArguments(args);是调用父类Command的processRawArguments(args);
protected void processRawArguments(LinkedList<String> args)
throws IOException {
processArguments(expandArguments(args));
}
*************分两部分:首先分析expandArguments(args)********************
//将参数由list<string>转换为list<pathData>
protected LinkedList<PathData> expandArguments(LinkedList<String> args)
throws IOException {
LinkedList<PathData> expandedArgs = new LinkedList<PathData>();
for (String arg : args) {
try {
expandedArgs.addAll(expandArgument(arg));
} catch (IOException e) { // other exceptions are probably nasty
displayError(e);
}
}
return expandedArgs;
}
//将单个arg转化为PathData列表,为何单个会转化为列表?这是因为arg支持通配符匹配(这也是Glob单词的含义-通配)
protected List<PathData> expandArgument(String arg) throws IOException {
PathData[] items = PathData.expandAsGlob(arg, getConf());
if (items.length == 0) {
// it's a glob that failed to match
throw new PathNotFoundException(arg);
}
return Arrays.asList(items);
}
/**
*重点来看expandAsGlob方法,它的流程如下:
*1)将string转化为Path
*2)通过Path实例化FileSystem
*3)通过FileSystem获取到Path的FileStatus列表
*4)将FileStatus列表包装为PathData[]
FileStatus类封装了文件系统中文件和目录的元数据,包括文件长度、块大小、复本数、修改时间、访问时间、所有者、权限信息。
**/
public static PathData[] expandAsGlob(String pattern, Configuration conf)
throws IOException {
Path globPath = new Path(pattern);
//根据path uri的sechme头结合conf配置来创建出FileSystem实例,具体分析见附录1
FileSystem fs = globPath.getFileSystem(conf);
//获取通配path的FileStatus,具体分析见附录2
FileStatus[] stats = fs.globStatus(globPath);
PathData[] items = null;
//如果文件系统中没有匹配到这样的path,因此就构造一个空的PathData
//这里需要注意一下:对于non-existent paths不会抛出exception而是构造一个空的PathData,是因为touch、mkdir等命令是需要创建path的。
if (stats == null) {
// remove any quoting in the glob pattern
pattern = pattern.replaceAll("\\\\(.)", "$1");
// not a glob & file not found, so add the path with a null stat
items = new PathData[]{ new PathData(fs, pattern, null) };
} else {
// figure out what type of glob path was given, will convert globbed
// paths to match the type to preserve relativity
PathType globType;
URI globUri = globPath.toUri();
if (globUri.getScheme() != null) {
globType = PathType.HAS_SCHEME;
} else if (!globUri.getPath().isEmpty() &&
new Path(globUri.getPath()).isAbsolute()) {
globType = PathType.SCHEMELESS_ABSOLUTE;
} else {
globType = PathType.RELATIVE;
}
// convert stats to PathData
// 将FileStatus列表包装为PathData[],并按path从小到大排序
items = new PathData[stats.length];
int i=0;
for (FileStatus stat : stats) {
URI matchUri = stat.getPath().toUri();
String globMatch = null;
switch (globType) {
case HAS_SCHEME: // use as-is, but remove authority if necessary
if (globUri.getAuthority() == null) {
matchUri = removeAuthority(matchUri);
}
globMatch = uriToString(matchUri, false);
break;
case SCHEMELESS_ABSOLUTE: // take just the uri's path
globMatch = matchUri.getPath();
break;
case RELATIVE: // make it relative to the current working dir
URI cwdUri = fs.getWorkingDirectory().toUri();
globMatch = relativize(cwdUri, matchUri, stat.isDirectory());
break;
}
items[i++] = new PathData(fs, globMatch, stat);
}
}
Arrays.sort(items); //按path从小到大排序,见PathData的compareTo方法
return items;
}
接着重点看看fs.globStatus(globPath);,这是fs真正获取fileStatus的过程,如下:
父类FileSystem.globStatus
public FileStatus[] globStatus(Path pathPattern) throws IOException {
return new Globber(this, pathPattern, DEFAULT_FILTER).glob();
}
真正的获取FileStatus在glob()中:
public FileStatus[] glob() throws IOException {
// First we get the scheme and authority of the pattern that was passed
// in.
String scheme = schemeFromPath(pathPattern);
String authority = authorityFromPath(pathPattern);
// Next we strip off everything except the pathname itself, and expand all
// globs. Expansion is a process which turns "grouping" clauses,
// expressed as brackets, into separate path patterns.
String pathPatternString = pathPattern.toUri().getPath();
List<String> flattenedPatterns = GlobExpander.expand(pathPatternString);
// Now loop over all flattened patterns. In every case, we'll be trying to
// match them to entries in the filesystem.
ArrayList<FileStatus> results =
new ArrayList<FileStatus>(flattenedPatterns.size());
boolean sawWildcard = false;
for (String flatPattern : flattenedPatterns) {
// Get the absolute path for this flattened pattern. We couldn't do
// this prior to flattening because of patterns like {/,a}, where which
// path you go down influences how the path must be made absolute.
Path absPattern = fixRelativePart(new Path(
flatPattern.isEmpty() ? Path.CUR_DIR : flatPattern));
// Now we break the flattened, absolute pattern into path components.
// For example, /a/*/c would be broken into the list [a, *, c]
List<String> components =
getPathComponents(absPattern.toUri().getPath());
// Starting out at the root of the filesystem, we try to match
// filesystem entries against pattern components.
ArrayList<FileStatus> candidates = new ArrayList<FileStatus>(1);
// To get the "real" FileStatus of root, we'd have to do an expensive
// RPC to the NameNode. So we create a placeholder FileStatus which has
// the correct path, but defaults for the rest of the information.
// Later, if it turns out we actually want the FileStatus of root, we'll
// replace the placeholder with a real FileStatus obtained from the
// NameNode.
FileStatus rootPlaceholder;
if (Path.WINDOWS && !components.isEmpty()
&& Path.isWindowsAbsolutePath(absPattern.toUri().getPath(), true)) {
// On Windows the path could begin with a drive letter, e.g. /E:/foo.
// We will skip matching the drive letter and start from listing the
// root of the filesystem on that drive.
String driveLetter = components.remove(0);
rootPlaceholder = new FileStatus(0, true, 0, 0, 0, new Path(scheme,
authority, Path.SEPARATOR + driveLetter + Path.SEPARATOR));
} else {
rootPlaceholder = new FileStatus(0, true, 0, 0, 0,
new Path(scheme, authority, Path.SEPARATOR));
}
candidates.add(rootPlaceholder);
for (int componentIdx = 0; componentIdx < components.size();
componentIdx++) {
ArrayList<FileStatus> newCandidates =
new ArrayList<FileStatus>(candidates.size());
GlobFilter globFilter = new GlobFilter(components.get(componentIdx));
String component = unescapePathComponent(components.get(componentIdx));
if (globFilter.hasPattern()) {
sawWildcard = true;
}
if (candidates.isEmpty() && sawWildcard) {
// Optimization: if there are no more candidates left, stop examining
// the path components. We can only do this if we've already seen
// a wildcard component-- otherwise, we still need to visit all path
// components in case one of them is a wildcard.
break;
}
if ((componentIdx < components.size() - 1) &&
(!globFilter.hasPattern())) {
// Optimization: if this is not the terminal path component, and we
// are not matching against a glob, assume that it exists. If it
// doesn't exist, we'll find out later when resolving a later glob
// or the terminal path component.
for (FileStatus candidate : candidates) {
candidate.setPath(new Path(candidate.getPath(), component));
}
continue;
}
for (FileStatus candidate : candidates) {
if (globFilter.hasPattern()) {
//在这里了
FileStatus[] children = listStatus(candidate.getPath());
if (children.length == 1) {
// If we get back only one result, this could be either a listing
// of a directory with one entry, or it could reflect the fact
// that what we listed resolved to a file.
//
// Unfortunately, we can't just compare the returned paths to
// figure this out. Consider the case where you have /a/b, where
// b is a symlink to "..". In that case, listing /a/b will give
// back "/a/b" again. If we just went by returned pathname, we'd
// incorrectly conclude that /a/b was a file and should not match
// /a/*/*. So we use getFileStatus of the path we just listed to
// disambiguate.
if (!getFileStatus(candidate.getPath()).isDirectory()) {
continue;
}
}
for (FileStatus child : children) {
if (componentIdx < components.size() - 1) {
// Don't try to recurse into non-directories. See HADOOP-10957.
if (!child.isDirectory()) continue;
}
// Set the child path based on the parent path.
child.setPath(new Path(candidate.getPath(),
child.getPath().getName()));
if (globFilter.accept(child.getPath())) {
newCandidates.add(child);
}
}
} else {
// When dealing with non-glob components, use getFileStatus
// instead of listStatus. This is an optimization, but it also
// is necessary for correctness in HDFS, since there are some
// special HDFS directories like .reserved and .snapshot that are
// not visible to listStatus, but which do exist. (See HADOOP-9877)
//在这里了
FileStatus childStatus = getFileStatus(
new Path(candidate.getPath(), component));
if (childStatus != null) {
newCandidates.add(childStatus);
}
}
}
candidates = newCandidates;
}
for (FileStatus status : candidates) {
// Use object equality to see if this status is the root placeholder.
// See the explanation for rootPlaceholder above for more information.
if (status == rootPlaceholder) {
status = getFileStatus(rootPlaceholder.getPath());
if (status == null) continue;
}
// HADOOP-3497 semantics: the user-defined filter is applied at the
// end, once the full path is built up.
if (filter.accept(status.getPath())) {
results.add(status);
}
}
}
/*
* When the input pattern "looks" like just a simple filename, and we
* can't find it, we return null rather than an empty array.
* This is a special case which the shell relies on.
*
* To be more precise: if there were no results, AND there were no
* groupings (aka brackets), and no wildcards in the input (aka stars),
* we return null.
*/
if ((!sawWildcard) && results.isEmpty() &&
(flattenedPatterns.size() <= 1)) {
return null;
}
return results.toArray(new FileStatus[0]);
}
调用到了Globber的getFileStatus和listStatus方法:
private FileStatus getFileStatus(Path path) throws IOException {
try {
if (fs != null) {
return fs.getFileStatus(path);
} else {
return fc.getFileStatus(path);
}
} catch (FileNotFoundException e) {
return null;
}
}
private FileStatus[] listStatus(Path path) throws IOException {
try {
if (fs != null) {
return fs.listStatus(path);
} else {
return fc.util().listStatus(path);
}
} catch (FileNotFoundException e) {
return new FileStatus[0];
}
}
至此,终于看到fs.listStatus了。即后面就是调用实际的FileSystem实现的getFileStatus和listStatus方法了,就不在此分析了。
*************分两部分:其次分析processArguments(expandArguments(args));********************
protected void processArguments(LinkedList<PathData> args)
throws IOException {
for (PathData arg : args) {
try {
processArgument(arg);
} catch (IOException e) {
displayError(e);
}
}
}
protected void processArgument(PathData item) throws IOException {
if (item.exists) {
processPathArgument(item);
} else {
processNonexistentPath(item);
}
}
//存在则会进行格式化处理,见下面
protected void processPathArgument(PathData item) throws IOException {
// null indicates that the call is not via recursion, ie. there is
// no parent directory that was expanded
depth = 0;
processPaths(null, item);
}
//不存在的处理是直接抛错给用户提示信息
protected void processNonexistentPath(PathData item) throws IOException {
throw new PathNotFoundException(item.toString());
}
输出结果格式化的流程如下:
@Override
protected void processPaths(PathData parent, PathData ... items)
throws IOException {
if (parent != null && !isRecursive() && items.length != 0) {
out.println("Found " + items.length + " items");
}
adjustColumnWidths(items); // 计算列宽,重新构建格式字符串
super.processPaths(parent, items);
}
接下来重新调整了一下列宽,最后调用了父类的processPaths方法:
protected void processPaths(PathData parent, PathData ... items)
throws IOException {
// TODO: this really should be iterative
for (PathData item : items) {
try {
processPath(item); // 真正处理每一项,然后打印出来
if (recursive && isPathRecursable(item)) {
recursePath(item); // 如果有指定参数 -R,则需要进行递归
}
postProcessPath(item);
} catch (IOException e) {
displayError(e);
}
}
}
最后,来看一下打印具体每行信息的代码:
@Override
protected void processPath(PathData item) throws IOException {
FileStatus stat = item.stat;
String line = String.format(lineFormat,
(stat.isDirectory() ? "d" : "-"), // 文件夹显示d,文件显示-
stat.getPermission() + (stat.getPermission().getAclBit() ? "+" : " "), // 获取权限
(stat.isFile() ? stat.getReplication() : "-"),
stat.getOwner(), // 获取拥有者
stat.getGroup(), // 获取组
formatSize(stat.getLen()), // 获取大小
dateFormat.format(new Date(stat.getModificationTime())), // 日期
item // 项,即路径
);
out.println(line); // 打印行
}
至此,命令hadoop fs -ls /的执行过程就结束了。
附录
1 path.getFileSystem(conf)
Path类
/** Return the FileSystem that owns this Path. */
public FileSystem getFileSystem(Configuration conf) throws IOException {
return FileSystem.get(this.toUri(), conf);
}
FileSystem类
/** Returns the FileSystem for this URI's scheme and authority. The scheme
* of the URI determines a configuration property name,
* <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class.
* The entire URI is passed to the FileSystem instance's initialize method.
*/
public static FileSystem get(URI uri, Configuration conf) throws IOException {
String scheme = uri.getScheme();
String authority = uri.getAuthority();
if (scheme == null && authority == null) { // use default FS
return get(conf);
}
if (scheme != null && authority == null) { // no authority
URI defaultUri = getDefaultUri(conf);
if (scheme.equals(defaultUri.getScheme()) // if scheme matches default
&& defaultUri.getAuthority() != null) { // & default has authority
return get(defaultUri, conf); // return default
}
}
String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme);
if (conf.getBoolean(disableCacheName, false)) {
//最终创建FileSystem都走的这里
return createFileSystem(uri, conf);
}
//将FileSystem实例缓起来,以后不用每次都反射new实例,是对性能的优化。底层是个map,key为uri.scheme,value为FileSystem具体实例对象
return CACHE.get(uri, conf);
}
FileSystem类
private static FileSystem createFileSystem(URI uri, Configuration conf
) throws IOException {
Class<?> clazz = getFileSystemClass(uri.getScheme(), conf);
FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf);
fs.initialize(uri, conf);
return fs;
}
/**
*根据scheme来获取Class对象有两个途径:
1)通过配置文件中的配置的来获取
2)配置文件中居然没有配置怎么办?从注册的内置文件系统中获取,内置文件系统只有两个:file:///和hdfs://
**/
public static Class<? extends FileSystem> getFileSystemClass(String scheme,
Configuration conf) throws IOException {
//注册文件系统类,使用java的ClassLoader机制加载所以实现了FileSystem接口的类
if (!FILE_SYSTEMS_LOADED) {
loadFileSystems();
}
Class<? extends FileSystem> clazz = null;
if (conf != null) {
clazz = (Class<? extends FileSystem>) conf.getClass("fs." + scheme + ".impl", null); //从配置文件中获取
}
if (clazz == null) {
clazz = SERVICE_FILE_SYSTEMS.get(scheme); //从注册的文件系统中获取
}
if (clazz == null) {
throw new IOException("No FileSystem for scheme: " + scheme);
}
return clazz;
}
//注册文件系统
private static void loadFileSystems() {
synchronized (FileSystem.class) {
if (!FILE_SYSTEMS_LOADED) {
ServiceLoader<FileSystem> serviceLoader = ServiceLoader.load(FileSystem.class);
for (FileSystem fs : serviceLoader) {
//每个fs实现类都指定了自己的scheme
SERVICE_FILE_SYSTEMS.put(fs.getScheme(), fs.getClass());
}
FILE_SYSTEMS_LOADED = true;
}
}
}
最后就是fs的初始化了fs.initialize(uri, conf);,不通的FileSystem实现有不通的初始化过程,在此就不分析了。
2 HDFS中文件路径通配符支持
/**
* 文件模式
* 在单个操作中处理一批文件是一个很常见的需求。
* 在一个表达式中使用通配符来匹配多个文件是比较方便的,无需列举每个文件和目录来指定输入,该操作称为"通配"(globbing)。
* Hadoop为执行通配提供了两个FileSystem方法
* public FileStatus[] globStatus(Path pathPattern) throws IOException {
return new Globber(this, pathPattern, DEFAULT_FILTER).glob();
}
public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException {
return new Globber(this, pathPattern, filter).glob();
}
globStatus方法返回与其路径匹配于指定模式的所有文件的FileStatus对象数组,并按路径排序。
PathFilter命令作为可选项可以进一步对匹配结果进行限制
*/
public static void globbing() {
String uri = "/user/hdfs/MapReduce/data";
Configuration configuration = new Configuration();
try {
fileSystem = FileSystem.get(new URI(uri), configuration);
// /2018/08/12 /2017/08/11
FileStatus[] fileStatus = fileSystem.globStatus(new Path("/user/hdfs/MapReduce/data/*/*/{11,12}"));
// 1./user/hdfs/MapReduce/data/201[78](201[7-8] 、 201[^01234569]) hdfs://fz/user/hdfs/MapReduce/data/2017 hdfs://fz/user/hdfs/MapReduce/data/2018
// 2./user/hdfs/MapReduce/data/*/*/11 hdfs://fz/user/hdfs/MapReduce/data/2017/08/11
// 3./user/hdfs/MapReduce/data/*/*/{11,12} hdfs://fz/user/hdfs/MapReduce/data/2017/08/11 hdfs://fz/user/hdfs/MapReduce/data/2018/08/12
for (FileStatus fileStatus2 : fileStatus) {
System.out.println(fileStatus2.getPath().toString());
}
fileSystem.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* PathFilter
* 通配符模式并不总能够精确地描述我们想要访问的字符集。比如,使用通配格式排除一个特定文件就不太可能。
* FileSystem中的listStatus和globStatus方法提供了可选的pathFilter对象,以编程方式控制通配符
*/
public static void pathFilter() {
String uri = "/user/hdfs/MapReduce/data";
Configuration configuration = new Configuration();
try {
fileSystem = FileSystem.get(new URI(uri), configuration);
// /2018/08/12 /2017/08/11 新增一个/2017/08/12
FileStatus[] fileStatus = fileSystem.globStatus(new Path("/user/hdfs/MapReduce/data/201[78]/*/*"), new RegexExcludePathFilter("^.*/2017/08/11$"));
//FileStatus[] fileStatus = fileSystem.globStatus(new Path("/user/hdfs/MapReduce/data/2017/*/*"), new RegexExcludePathFilter("/user/hdfs/MapReduce/data/2017/08/11"));
for (FileStatus fileStatus2 : fileStatus) {
System.out.println(fileStatus2.getPath().toString());
}
fileSystem.close();
} catch (Exception e) {
e.printStackTrace();
}
}