当我们在命令行打如下命令:hadoop fs -ls .
我们知道将显示出文件系统中的根目录下的文件,那究竟在代码里如何执行的呢,今天就一看究竟。
当我们按下这条命令,首先找到bin目录下的hadoop脚本,其中有一行脚本:
elif [ "$COMMAND" = "fs" ] ; thenCLASS=org.apache.hadoop.fs.FsShell
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
可以看到执行的是FsShell这个类的main方法:
public static void main(String argv[]) throws Exception {
FsShell shell = new FsShell();
int res;
try {
res = ToolRunner.run(shell, argv);
} finally {
shell.close();
}
System.exit(res);
}
res = ToolRunner.run(shell, argv);这个方法会调用FsShell的run方法:
FsShell的run方法很长,我这里只截取根fs这个参数有关的代码:
else if ("-ls".equals(cmd)) {
if (i < argv.length) {
exitCode = doall(cmd, argv, i);
} else {
exitCode = ls(Path.CUR_DIR, false);
}
}
也就是调用ls方法:
private int ls(String srcf, boolean recursive) throws IOException {
Path srcPath = new Path(srcf);
FileSystem srcFs = srcPath.getFileSystem(this.getConf());
FileStatus[] srcs = srcFs.globStatus(srcPath);
if (srcs==null || srcs.length==0) {
throw new FileNotFoundException("Cannot access " + srcf +
": No such file or directory.");
}
boolean printHeader = (srcs.length == 1) ? true: false;
int numOfErrors = 0;
for(int i=0; i<srcs.length; i++) {
numOfErrors += ls(srcs[i], srcFs, recursive, printHeader);
}
return numOfErrors == 0 ? 0 : -1;
}
其中会调用 ls(srcs[i], srcFs, recursive, printHeader);
private int ls(FileStatus src, FileSystem srcFs, boolean recursive,
boolean printHeader) throws IOException {
final String cmd = recursive? "lsr": "ls";
final FileStatus[] items = shellListStatus(cmd, srcFs, src);
if (items == null) {
return 1;
} else {
int numOfErrors = 0;
if (!recursive && printHeader) {
if (items.length != 0) {
System.out.println("Found " + items.length + " items");
}
}
int maxReplication = 3, maxLen = 10, maxOwner = 0,maxGroup = 0;
for(int i = 0; i < items.length; i++) {
FileStatus stat = items[i];
int replication = String.valueOf(stat.getReplication()).length();
int len = String.valueOf(stat.getLen()).length();
int owner = String.valueOf(stat.getOwner()).length();
int group = String.valueOf(stat.getGroup()).length();
if (replication > maxReplication) maxReplication = replication;
if (len > maxLen) maxLen = len;
if (owner > maxOwner) maxOwner = owner;
if (group > maxGroup) maxGroup = group;
}
for (int i = 0; i < items.length; i++) {
FileStatus stat = items[i];
Path cur = stat.getPath();
String mdate = dateForm.format(new Date(stat.getModificationTime()));
System.out.print((stat.isDir() ? "d" : "-") +
stat.getPermission() + " ");
System.out.printf("%"+ maxReplication +
"s ", (!stat.isDir() ? stat.getReplication() : "-"));
if (maxOwner > 0)
System.out.printf("%-"+ maxOwner + "s ", stat.getOwner());
if (maxGroup > 0)
System.out.printf("%-"+ maxGroup + "s ", stat.getGroup());
System.out.printf("%"+ maxLen + "d ", stat.getLen());
System.out.print(mdate + " ");
System.out.println(cur.toUri().getPath());
if (recursive && stat.isDir()) {
numOfErrors += ls(stat,srcFs, recursive, printHeader);
}
}
return numOfErrors;
}
}
很简单,就是将目录下的文件的信息打印出来,如果recursive为true的话,递归地打印出这个子孙文件