一、创建工程
二、创建maven工程并配置参数
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.soft863</groupId>
<artifactId>hdfsdemo</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<hadoop.version>3.2.0</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.6.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.2-beta-5</version>
<configuration>
<archive>
<manifest>
<mainClass>
com.soft863.mr.WordCountDriver
</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>
jar-with-dependencies
</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
三、通过API操作HDFS
操作前置条件:
在windows系统下D:\data路径创建test1.txt文本,并在文本中添加文字(任何文本都可以)
创建com.soft863.hdfs包
创建HDFSAPI1类
package com.soft863.hdfs;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.conf.Configuration;
import java.io.IOException;
public class HDFSAPI1 {
public static void main(String[] args) throws IOException {
start();
}
private static void start() throws IOException {
// 创建配置信息对象
// new Configuration();的时候,它就会去加载jar包中的hdfs-default.xml
// 然后再加载classpath下的hdfs-site.xml
Configuration configuration = new Configuration();
// 设置参数
configuration.set("fs.defaultFS", "hdfs://hadoop100:9000");
configuration.set("dfs.replication", "3");
// 获取文件系统
FileSystem fs = FileSystem.get(configuration);
// 打印文件系统
System.out.println(fs.toString());
//创建文件夹
Path newPath =new Path("hdfs://hadoop100:9000/soft863");
fs.mkdirs(newPath);
//删除文件夹
fs.delete(newPath,true);
newPath =new Path("hdfs://hadoop100:9000/soft863");
fs.mkdirs(newPath);
//文件夹重新命名
Path newPath1 = new Path("hdfs://hadoop100:9000/bigdata");
fs.rename(newPath,newPath1);
//文件上传
Path sourcePath = new Path("D:\\data\\test1.txt");
Path targetPath = new Path("hdfs://hadoop100:9000/bigdata");
fs.copyFromLocalFile(sourcePath, targetPath);
//文件下载
Path targetPath1 = new Path("D:\\data\\test01.txt");
fs.copyToLocalFile(targetPath, targetPath1);
//获取文件列表
FileStatus[] statuses = fs.listStatus(new Path("/bigdata"));
for (FileStatus status : statuses) {
System.out.println(status);
}
//获取列表
RemoteIterator<LocatedFileStatus> listFiles =fs.listFiles(new Path("hdfs://hadoop100:9000/bigdata"),true);
while (listFiles.hasNext()) {
LocatedFileStatus fileStatus = listFiles.next();
System.out.println(fileStatus.getPath().getName());
System.out.println(fileStatus.getBlockSize());
System.out.println(fileStatus.getPermission());
System.out.println(fileStatus.getLen());
BlockLocation[] blockLocations = fileStatus.getBlockLocations();
for (BlockLocation bl : blockLocations) {
System.out.println("block-offset:" + bl.getOffset());
String[] hosts = bl.getHosts();
for (String host : hosts) {
System.out.println(host);
}
}
}
fs.close();
}
}
步骤直接结果:
四、通过IO流操作HDFS
在com.soft863.hdfs包下面创建HDFSAPI2类
package com.soft863.hdfs;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.net.URI;
public class HDFSAPI2 {
@Test
public void putFileToHDFS() throws Exception{
// 1 创建配置信息对象
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop100:9000"),configuration);
// 2 创建输入流
FileInputStream inStream = new FileInputStream(new File("D:\\data\\test1.txt"));
// 3 获取输出路径
String putFileName = "hdfs://hadoop100:9000/bigdata/test2.txt";
Path writePath = new Path(putFileName);
// 4 创建输出流
FSDataOutputStream outStream = fs.create(writePath);
// 5 流对接
try{
IOUtils.copyBytes(inStream, outStream, 4096, false);
}catch(Exception e){
e.printStackTrace();
}finally{
IOUtils.closeStream(inStream);
IOUtils.closeStream(outStream);
}
}
@Test
public void getFileToHDFS() throws Exception{
// 1 创建配置信息对象
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop100:9000"),configuration);
// 2 获取读取文件路径
String filename = "hdfs://hadoop100:9000/bigdata/test2.txt";
// 3 创建读取path
Path readPath = new Path(filename);
// 4 创建输入流
FSDataInputStream inStream = fs.open(readPath);
FileOutputStream fos = new FileOutputStream("D:\\data\\test02.txt");
// 5 流对接输出到控制台
try{
IOUtils.copyBytes(inStream, fos, 4096, false);
}catch(Exception e){
e.printStackTrace();
}finally{
IOUtils.closeStream(inStream);
IOUtils.closeStream(fos);
}
}
}