一、递归遍历HDFS并筛选文件
1-1、对于本地文件系统
public static boolean logFilter(Path path){
return path.toString().toLowerCase().endsWith(".log");
}
public static Set<Path> listLogs(String path) throws IOException {
Set<Path> logFiles = Files.walk(Paths.get(path))
.filter(Utils::logFilter).collect(Collectors.toSet());
return logFiles;
}
1-2、对于HDFS文件系统
def traverseDir(hdconf: Configuration, path: String, recursive: Boolean, filePaths: StringBuffer) {
val files = FileSystem.get(hdconf).listStatus(new Path(path))
files.foreach {
fStatus => {
if (!fStatus.isDirectory && fStatus