java实现hdfs文件操作
在main下面创建resources文件夹
1.编辑添加log4j.properties 打印程序运行过程
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
2.java实现操作hdfs 查看hdfs文件目录
package cn.bright.kgc;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
/**
* @Author Bright
* @Date 2020/12/1
* @Description
*/
public class HDFSListFiles {
@Test
public void listFiles() throws URISyntaxException, IOException, InterruptedException {
//1获取文件系统连接
Configuration conf = new Configuration();
conf.set("fs.defaultFS","hdfs://hadoop001:9000");
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:9000"),conf,"root");
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/kgc"), true);
while (listFiles.hasNext()){
LocatedFileStatus status=listFiles.next();
System.out.println(status.getPath().getName());
System.out.println(status.getGroup());
System.out.println(status.getLen());
System.out.println(status.getReplication());
System.out.println("----------------------------------");
BlockLocation[] blockLocations = status.getBlockLocations();
for (BlockLocation blockLocation :
blockLocations) {
System.out.println(blockLocation.getHosts());
System.out.println(blockLocation.getLength());
}
}
fs.close();
}
}
3.java实现操作hdfs 命令 上传下载文件
package cn.bright.kgc;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
/**
* @Author Bright
* @Date 2020/12/1
* @Description
*/
public class HDFSClientDemo {
@Test
public void testMkdirs() throws IOException, URISyntaxException, InterruptedException {
//1获取文件系统连接
Configuration conf = new Configuration();
conf.set("fs.defaultFS","hdfs://hadoop001:9000");
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:9000"),conf,"root");
//2 创建一个文件夹
// hdfs dfs -mkdir -p /kgc/hdfs
fs.mkdirs(new Path("/kgc/hdfs"));
// 3 资源关闭
fs.close();
}
@Test
public void testPutFileToHDFS() throws URISyntaxException, IOException, InterruptedException {
//1 获取文件系统连接
Configuration conf = new Configuration();
conf.set("dfs.replication","1");
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:9000"), conf, "root");
//2 上传一个文件到hdfs
fs.copyFromLocalFile(new Path("data/test.txt"),new Path("/kgc/hdfs"));
//3 关闭资源
fs.close();
}
@Test
public void testGetFileToHDFS() throws URISyntaxException, IOException, InterruptedException {
//1 获取文件系统连接
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:9000"), conf, "root");
//2 下载一个文件到本地
fs.copyToLocalFile(new Path("/kgc/hdfs/test.txt"),new Path("data/test1.txt"));
//3 关闭资源
fs.close();
}
}
4.maven依赖
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.21</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.6.0</version>
</dependency>
</dependencies>