packagecom.my.hdfsopt;importjava.io.FileNotFoundException;importjava.io.IOException;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.FileStatus;importorg.apache.hadoop.fs.FileSystem;importorg.apache.hadoop.fs.Path;public classHdfsPathMonitor {//submit shell
/** main类的路径不需要指定,否则会被认为是参数传递进入。
* yarn jar /app/m_user1/service/Hangzhou_HdfsFileMananger.jar /hive_tenant_account/hivedbname/*/
public static void main(String[] args) throwsException {
System.out.println("the args is " + String.join(",", args));
String dirPath= args[0];
Configuration conf= newConfiguration();/** fs.defaultFShdfs://mycluster
* */conf.set("fs.defaultFS", "hdfs://mycluster");
FileSystem fileSystem=FileSystem.get(conf);
Path path= newPath(dirPath);//获取文件列表
FileStatus[] files =fileSystem.listStatus(path);if (files == null || files.length == 0) {throw new FileNotFoundException("Cannot access " + dirPath + ": No such file or directory.");
}
System.out.println("dirpath \t total file size \t total file count");for (int i = 0; i < files.length; i++) {
String pathStr=files[i].getPath().toString();
FileSystem fs=files[i].getPath().getFileSystem(conf);long totalSize =fs.getContentSummary(files[i].getPath()).getLength();long totalFileCount =listAll(conf, files[i].getPath());
fs.close();
System.out.println(("".equals(pathStr) ? "." : pathStr) + "\t" + totalSize + "\t" +totalFileCount);
}
}/*** @Title: listAll @Description: 列出目录下所有文件@returnvoid 返回类型@throws
*/
public static Long listAll(Configuration conf, Path path) throwsIOException {long totalFileCount = 0;
FileSystem fs=FileSystem.get(conf);if(fs.exists(path)) {
FileStatus[] stats=fs.listStatus(path);for (int i = 0; i < stats.length; ++i) {if (!stats[i].isDir()) {//regular file//System.out.println(stats[i].getPath().toString());
totalFileCount++;
}else{//dir//System.out.println(stats[i].getPath().toString());
totalFileCount +=listAll(conf, stats[i].getPath());
}
}
}
fs.close();returntotalFileCount;
}
}