HDFS_FileSystem(scala)类介绍
FileSystem api: http://hadoop.apache.org/docs/current/api/org/apache/hadoop/fs/FileSystem.html
LocatedFileStatus 是 FileStatus 的子类
import org.apache.hadoop.fs.FileSystem
下面就对常用方法进行举例:
导入配置文件
首先,创建工程到入hadoop配置文件
准备读取目录下的文件
listStatus
/**
* List the statuses of the files/directories in the given path if the path is
* a directory.
*
* @param f given path
* @return the statuses of the files/directories in the given patch
* @throws FileNotFoundException when the path does not exist;
* IOException see specific implementation
*/
public abstract FileStatus[] listStatus(Path f) throws FileNotFoundException,
IOException;
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
object TestHDFS {
def main(args: Array[String]): Unit = {
val HDFSPath = "/origin_data/cp_realtime/2020-12-25"
val fs = FileSystem.get(new Configuration())
val files = fs.listStatus(new Path(HDFSPath))
for (file <- files) {
if (file.isFile) {
println("=================================================")
println("这是一个文件")
println(s"file: $file") //文件所有信息
println("文件长度: " + file.getLen)
println("文件路径: " + file.getPath)
println("文件创建时间 " + file.getModificationTime)
println("文件上次访问时间: " + file.getAccessTime)
println("文件块大小: " + file.getBlockSize / 1024 / 1024 + "MB")
} else if (file.isDirectory) {
println("=================================================")
println("这是文件夹")
println("文件父路径: " + file.getPath)
}
}
}
}
效果:
listFiles
/**
* List the statuses and block locations of the files in the given path.
*
* If the path is a directory,
* if recursive is false, returns files in the directory;
* if recursive is true, return files in the subtree rooted at the path.
* If the path is a file, return the file's status and block locations.
*
* @param f is the path
* @param recursive if the subdirectories need to be traversed recursively
*
* @return an iterator that traverses statuses of the files
*
* @throws FileNotFoundException when the path does not exist;
* IOException see specific implementation
*/
public RemoteIterator<LocatedFileStatus> listFiles(
final Path f, final boolean recursive)
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
object TestHDFS2 {
def main(args: Array[String]): Unit = {
val HDFSPath = "/origin_data/cp_realtime/2020-12-25"
val fs = FileSystem.get(new Configuration())
val files = fs.listFiles(new Path(HDFSPath), true) // 如果recursive为false,返回目录中的文件; 如果recursive为true,则返回以路径为根的子树中的文件。
while (files.hasNext) {
val file = files.next()
// println("file: " + file)
println("文件路径:" + file.getPath)
println("文件权限:" + file.getPermission)
val blockLocations = file.getBlockLocations
for (blockLocation <- blockLocations) {
val hosts = blockLocation.getHosts // 获取块存储的主机节点
for (host <- hosts) {
println("存储的主机节点: " + host)
}
}
}
}
}
效果: