本文以hadoop2.6.4为例,讲解使用java进行hadoop开发实例。
本文包含的主要内容:
(1)开发环境准备
(2)hadoop java开发实例
1. 创建maven项目
pom.xml配置文件依赖:
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13</version>
</dependency>
</dependencies>
2.HDFS java客户端开发
2.1 上传文件
Configuration configuration = new Configuration();
//获取文件系统
FileSystem fs = FileSystem.get(new URI("hdfs://192.168.33.17:9000"), configuration, "hadoop");
//上传文件
fs.copyFromLocalFile(new Path("E://hadoop/helloword.txt"),
new Path("/0423/helloword.txt"));
fs.close();
System.out.println("执行完成");
2.2 下载文件
Configuration configuration = new Configuration();
//获取文件系统
FileSystem fs = FileSystem.get(new URI("hdfs://192.168.33.17:9000"), configuration, "hadoop");
//拷贝到本地
/*
* copyToLocalFile方法的
* 参数1:是否删除原文件
* 参数2:hadoop的文件路径
* 参数3:本地路径
* 参数4:是否开启校验
* */
fs.copyToLocalFile(false,new Path("/studentInfo_201701.txt"),new Path("E://hadoop/studentInfo_201701_down.txt"),true);
//关闭文件系统
fs.close();
2.3 创建目录
Configuration configuration = new Configuration();
//获取文件系统
FileSystem fs = FileSystem.get(new URI("hdfs://192.168.33.17:9000"), configuration, "hadoop");
fs.mkdirs(new Path("/0423"));
fs.close();
2.4 删除目录
Configuration configuration = new Configuration();
//获取文件系统
FileSystem fs = FileSystem.get(new URI("hdfs://192.168.33.17:9000"), configuration, "hadoop");
/*
* delete方法说明:
* 参数1:删除的的hdfs路径
* 参数2:是否递归删除
* */
fs.delete(new Path("/0423"), true);
2.5 重命名文件
Configuration configuration = new Configuration();
//获取文件系统
FileSystem fs = FileSystem.get(new URI("hdfs://192.168.33.17:9000"), configuration, "hadoop");
fs.rename(new Path("/0423/helloword.txt"), new Path("/0423/helloword1.txt"));
fs.close();
2.6 查看文件基本信息
Configuration configuration = new Configuration();
//获取文件系统
FileSystem fs = FileSystem.get(new URI("hdfs://192.168.33.17:9000"), configuration, "hadoop");
/*
* listFiles方法列出所有的文件
* 参数1:hdfs的路径
* 参数2:是否递归
* */
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/0423"), true);
while(listFiles.hasNext()) {
LocatedFileStatus lfs = listFiles.next();
//文件名称
String fileName = lfs.getPath().getName();
System.out.println("文件名:"+fileName);
System.out.println("组:"+lfs.getGroup());
System.out.println("长度:"+lfs.getLen());
System.out.println("权限:"+lfs.getPermission());
//获取数据所在的块
BlockLocation[] blockLocations = lfs.getBlockLocations();
for (BlockLocation block : blockLocations) {
String[] hosts = block.getHosts();
for(String host:hosts) {
System.out.println(host);
}
}
}
2.7 判断是文件或者文件夹
Configuration configuration = new Configuration();
//获取文件系统
FileSystem fs = FileSystem.get(new URI("hdfs://192.168.33.17:9000"), configuration, "hadoop");
//列出文件的状态
FileStatus[] listStatus = fs.listStatus(new Path("/0423"));
for (FileStatus status : listStatus) {
if (status.isFile()) {
System.out.println("f:"+status.getPath().getName());
}
else {
System.out.println("D:"+status.getPath().getName());
}
}