HDFS的API总体来说有两个主要的类。
FileSystem 和Configuration。
导入依赖
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
</dependencies>
(1)初始化HDFS
//因为远程提交的情况下如果没有hadoop 的系统环境变量,就会读取当前主机的用户名,所以Hadoop集群的节点中没有该用户名的权限,所以出现的异常。
System.setProperty("HADOOP_USER_NAME","root");
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://master:9000");
conf.set("dfs.replication", "2");
FileSystem fs = FileSystem.get(conf);
(2) 上传文件
fs.copyFromLocalFile(new Path("D:\\abc\\wordcount\\input\\test.txt"),new Path("/wordcount/a.txt"));
(3)下载文件
fs.copyToLocalFile(new Path("/wordcount/a.txt"),new Path("D:\\abc\\wordcount\\input\\test2.txt"));
(4)创建
fs.create(new Path("/wordcount/b.txt"));
(5)删除
fs.delete(new Path("/wordcount/b.txt"));
(6)获取一个文件的副本数
short i = fs.getDefaultReplication(new Path("/wordcount/b.txt"));
System.out.println(i);
(7)获取所有的文件
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"),true);
while (listFiles.hasNext()){
LocatedFileStatus fileStatus = listFiles.next();
Path path = fileStatus.getPath();
System.out.println(path);
//获取Block块的信息
BlockLocation[] blockLocations = fileStatus.getBlockLocations();
for (BlockLocation blockLocation:blockLocations) {
String[] hosts = blockLocation.getHosts();
String[] names = blockLocation.getNames();
long offset = blockLocation.getOffset();
System.out.println(offset);
System.out.println(ArrayUtils.toString(hosts));
System.out.println(ArrayUtils.toString(names));
}
}
fs.close();