一、HDFS常用类介绍
点这儿参考Hadoop环境搭建
- Configuration:主要是用于加载配置文件,默认通过static代码块加载core-default.xml配置文件,也可以自定义core-site.xml
- FileSystem:文件系统的抽象基类,通过FileSystem.get(Configuration conf)对象获取一个本地文件系统或一个分布式文件系统,具体获取哪一种系统取决于fs.defaultFS的value值
- FileStatus:文件的一些基础信息,如Path,length,blocksize等
- BlockLocation:文件块信息
- FSDataInputStream:输入流,通过FileSystem.open(Path f)创建
- FSDataOutputStream:输出流:通过FileSystem.create(Path f, boolean overwrite)创建
- IOUtils:Hadoop提供的工具包
二、代码模块
1. 在pom.xml加入依赖配置
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<!--注意这儿我是引入的parent ,父模块里面引入了一个lombok和一个srpringboot-->
<parent>
<artifactId>wgco-hadoop</artifactId>
<groupId>com.wc</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>HDFS</artifactId>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.2</version>
</dependency>
</dependencies>
</project>
2. 在resources增加core-site.xml,指定fs.defaultFS为分布式文件系统
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 指定HDFS中NameNode的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop101:9000</value>
</property>
</configuration>
3. 在resources增加log4j.properties
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
4. 编写Test类
package com.wc.hdfs.test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.Objects;
public class TestHDFS {
// 通过Configuration的static加载配置文件
// 不配置core-site.xml 会先去读取core-default.xml文件
private Configuration conf = new Configuration();
// 客户端端对象
private FileSystem fileSystem;
/**
* 创建连接
*
* @throws IOException
*/
@Before
public void init() throws IOException {
// 为了方便操作 执行了777权限 (hadoop fs -chmod -R 777 /) 不执行则用下面的get(uri,conf,user) 否则会该用户没有权限
fileSystem = FileSystem.get(conf);
//fileSystem = FileSystem.get(new URI("hdfs://hadoop101:9000"),conf,"wangchao");
System.out.println(fileSystem.getClass().getName() + "--创建成功");
}
/**
* 关闭连接
*
* @throws IOException
*/
@After
public void close() throws IOException {
if (!Objects.isNull(fileSystem)) {
fileSystem.close();
System.out.println(fileSystem.getClass().getName() + "--关闭连接");
}
}
/**
* 创建目录 hadoop fs -mkdir HDFSPath
*
* @throws IOException
*/
@Test
public void mkdir() throws IOException {
fileSystem.mkdirs(new Path("/test"));
}
/**
* 上传文件 hadoop fs -put localFile HDFSPath
*
* @throws IOException
*/
@Test
public void upload() throws IOException {
// 上传文件
fileSystem.copyFromLocalFile(false, true,
new Path("F:\\test.zip"), new Path("/test"));
}
/**
* 下载文件 hadoop fs -get HDFSPath localFile
*
* @throws IOException
*/
@Test
public void download() throws IOException {
fileSystem.copyToLocalFile(false, new Path("/test"),
new Path("F:\\download"), true);
}
/**
* 重命名 hadoop fs -mv oldName newName
*
* @throws IOException
*/
@Test
public void rename() throws IOException {
fileSystem.rename(new Path("/test"), new Path("/testRename"));
}
/**
* 删除 hadoop hs -rm -r -f HDFSPath
*
* @throws IOException
*/
@Test
public void delete() throws IOException {
fileSystem.delete(new Path("/testRename"), true);
}
/**
* 是否存在
*
* @throws IOException
*/
@Test
public void exists() throws IOException {
System.out.println("是否存在:" + fileSystem.exists(new Path("/test")));
}
/**
* 是文件还是目录
*
* @throws IOException
*/
@Test
public void isDirectoryOrIsFile() throws IOException {
Path path = new Path("/test");
FileStatus fileStatus = fileSystem.getFileStatus(path);
System.out.println(path + "是否是目录:" + fileStatus.isDirectory());
System.out.println(path + "是否是文件:" + fileStatus.isFile());
FileStatus[] fileStatuses = fileSystem.listStatus(path);
for (FileStatus temp : fileStatuses) {
Path filePath = temp.getPath();
System.out.println(filePath + "是否是目录:" + temp.isDirectory());
System.out.println(filePath + "是否是文件:" + temp.isFile());
}
}
/**
* 获取文件块信息
*
* @throws IOException
*/
@Test
public void fileBlockLocations() throws IOException {
Path path = new Path("/test");
// 获取文件的块信息
RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fileSystem.listLocatedStatus(path);
while (locatedFileStatusRemoteIterator.hasNext()) {
LocatedFileStatus locatedFileStatus = locatedFileStatusRemoteIterator.next();
System.out.println("name----" + locatedFileStatus.getPath().getName());
System.out.println("len----" + locatedFileStatus.getLen());
// 得到一个块数组
BlockLocation[] blockLocations = locatedFileStatus.getBlockLocations();
for (BlockLocation blockLocation : blockLocations) {
System.out.println("块信息:" + blockLocation);
}
System.out.println("--------------------");
}
}
}
5.注意事项
如果需要在Windows环境上直接运行,需要在Windows环境上配置hadoop环境变量。推荐打成jar包在linux上运行
如有错误,请指出,谢谢