文章目录
一. maven 依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.gao</groupId>
<artifactId>HDFSClient</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs-client</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.30</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
二. HDFS常见API使用说明
HDFS客户端代码常用套路
1、获取一个客户端对象
2、执行相关的操作命令
3、关闭资源
参数优先级
hdfs-default.xml< hdfs-site.xml<在项目资源目录下的配置文件<代码里面的配置
1. 初始化一个FileSystem
private FileSystem fs;
/**
* 通过namenode创建一个文件系统的读写入口
*/
@Before
public void init() throws URISyntaxException, IOException, InterruptedException {
// 连接的集群nn地址
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://localhost:9000"); // 设置HDFS的默认地址
fs = FileSystem.get(conf);
}
/**
* 关闭资源
*/
@After
public void close() throws IOException {
fs.close();
}
2. 创建目录
@Test
public void testmkdir() throws IOException {
fs.mkdirs(new Path("/flink/lib"));
}
3. 上传文件
/**
* 文件上传
*/
@Test
public void testPut() throws IOException {
// 参数解读:参数一:表示删除原数据; 参数二:是否允许覆盖;参数三:原数据路径; 参数四:目的地路径
fs.copyFromLocalFile(
false,
true,
new Path("D:\\sunwukong.txt"),
new Path("hdfs://hadoop102/xiyou/huaguoshan"));
}
@Test
public void testPut2() throws IOException {
FSDataOutputStream fos = fs.create(new Path("/input"));
fos.write("hello world".getBytes());
}
4. 下载文件
@Test
public void testGet() throws IOException {
// 参数的解读:参数一:原文件是否删除;参数二:原文件路径HDFS; 参数三:目标地址路径Win ; 参数四:
//fs.copyToLocalFile(true, new Path("hdfs://hadoop102/xiyou/huaguoshan/"), new Path("D:\\"), true);
fs.copyToLocalFile(
false,
new Path("hdfs://hadoop102/a.txt"),
new Path("D:\\"),
false);
}
5. 删除
@Test
public void testRm() throws IOException {
// 参数解读:参数1:要删除的路径; 参数2 : 是否递归删除
// 删除文件
//fs.delete(new Path("/jdk-8u212-linux-x64.tar.gz"),false);
// 删除空目录
//fs.delete(new Path("/xiyou"), false);
// 删除非空目录
fs.delete(new Path("/jinguo"), true);
}
6. 文件和文件夹的移动和重命名
@Test
public void testmv() throws IOException {
// 参数解读:参数1 :原文件路径; 参数2 :目标文件路径
// 对文件名称的修改
//fs.rename(new Path("/input/word.txt"), new Path("/input/ss.txt"));
// 文件的移动和更名
//fs.rename(new Path("/input/ss.txt"),new Path("/cls.txt"));
// 目录更名
fs.rename(new Path("/input"), new Path("/output"));
}
7. 获取文件详情
@Test
public void fileDetail() throws IOException {
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
while (listFiles.hasNext()) {
LocatedFileStatus fileStatus = listFiles.next();
System.out.println("==========" + fileStatus.getPath() + "=========");
System.out.println(fileStatus.getPermission());
System.out.println(fileStatus.getOwner());
System.out.println(fileStatus.getGroup());
System.out.println(fileStatus.getLen());
System.out.println(fileStatus.getModificationTime());
System.out.println(fileStatus.getReplication());
System.out.println(fileStatus.getBlockSize());
System.out.println(fileStatus.getPath().getName());
// 获取块信息
BlockLocation[] blockLocations = fileStatus.getBlockLocations();
System.out.println(Arrays.toString(blockLocations));
}
}
8. 判断文件还是文件夹
@Test
public void testFile() throws IOException {
FileStatus[] listStatus = fs.listStatus(new Path("/"));
for (FileStatus status : listStatus) {
if (status.isFile()) {
System.out.println("文件:" + status.getPath().getName());
} else {
System.out.println("目录:" + status.getPath().getName());
}
}
}