列出Hadoop文件夹下面的所有文件,如果子文件是一个文件夹,则递归列出里面的文件,返回一个Path数组。
import java.io.IOException
import org.apache.hadoop.fs.{FileSystem, FileStatus, Path}
import org.apache.hadoop.conf.Configuration
object FullFile {
@throws[IOException]
def recursiveListFiles(status :FileStatus, fs: FileSystem): Array[Path] = {
var fileArray = Array[Path]()
val inputPath = new Path(status.getPath.toString)
val fs = inputPath.getFileSystem(new Configuration())
val statuses = fs.listStatus(inputPath)
for (s <- statuses) {
if (s.isDirectory) {
fileArray = fileArray ++: recursiveListFiles(s, fs)
} else if (s.isFile) {
fileArray = fileArray :+ s.getPath
}
}
for (f <- fileArray) {println(f.toString)}
fileArray
}
def main(args: Array[String]) {
val conf = new Configuration()
val fs = FileSystem.get(conf)
val fileStatus = fs.getFileStatus(new Path(args(0)))
recursiveListFiles(fileStatus,fs)
}
}