HDFS 常见API使用及编程
1 pom 文件增加依赖
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0-cdh5.16.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies>
2 调用HDFS API
2.1 创建文件夹
package com.xk.bigdata.hadoop.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.net.URI;
public class HDFSAPITest {
FileSystem fileSystem = null;
@Before
public void setUp() throws Exception {
Configuration conf = new Configuration();
conf.set("dfs.replication", "1");
URI uri = new URI("hdfs://bigdatatest02:8020");
fileSystem = FileSystem.get(uri, conf, "hdfs");
}
@After
public void cleanUp() throws Exception {
if (null != fileSystem) fileSystem.close();
}
@Test
public void mkdir() throws Exception {
boolean res = fileSystem.mkdirs(new Path("/demo"));
System.out.println(res);
}
}
等效于 hadoop fs -mkdir / hdfs dfs -mkdir
2.2 从本地文件上传到HDFS
2.2.1 使用HDFS API
package com.xk.bigdata.hadoop.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.net.URI;
public class HDFSAPITest {
FileSystem fileSystem = null;
@Before
public void setUp() throws Exception {
Configuration conf = new Configuration();
conf.set("dfs.replication", "1");
URI uri = new URI("hdfs://bigdatatest02:8020");
fileSystem = FileSystem.get(uri, conf, "hdfs");
}
@After
public void cleanUp() throws Exception {
if (null != fileSystem) fileSystem.close();
}
@Test
public void copyFromLocalFile() throws Exception{
// 文件地址
Path src = new Path("E:\\workspace\\java\\hadoop-project\\hdfs-basic\\data\\demo.txt");
// 目标地址
Path dst = new Path("/demo/demo.txt");
fileSystem.copyFromLocalFile(src,dst);
}
}
2.2.2 使用IO流来上传文件
package com.xk.bigdata.hadoop.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.net.URI;
public class HDFSAPITest {
FileSystem fileSystem = null;
@Before
public void setUp() throws Exception {
Configuration conf = new Configuration();
conf.set("dfs.replication", "1");
URI uri = new URI("hdfs://bigdatatest02:8020");
fileSystem = FileSystem.get(uri, conf, "hdfs");
}
@After
public void cleanUp() throws Exception {
if (null != fileSystem) fileSystem.close();
}
@Test
public void copyFromLocalFileByIo() throws Exception {
FSDataOutputStream outputStream = fileSystem.create(new Path("/demo/demo2.txt"), true);
BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(new File("E:\\workspace\\java\\hadoop-project\\hdfs-basic\\data\\demo.txt")));
IOUtils.copyBytes(inputStream, outputStream, 2048);
IOUtils.closeStream(outputStream);
IOUtils.closeStream(inputStream);
}
}
等效于 hadoop fs -put /hdfs dfs -put
2.3 从HDFS上面下载文件
2.3.1 使用HDFS API
package com.xk.bigdata.hadoop.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.net.URI;
public class HDFSAPITest {
FileSystem fileSystem = null;
@Before
public void setUp() throws Exception {
Configuration conf = new Configuration();
conf.set("dfs.replication", "1");
URI uri = new URI("hdfs://bigdatatest02:8020");
fileSystem = FileSystem.get(uri, conf, "hdfs");
}
@After
public void cleanUp() throws Exception {
if (null != fileSystem) fileSystem.close();
}
@Test
public void copyToLocalFile() throws Exception {
// HDFS路径
Path src = new Path("/demo/demo.txt");
// 本地路径
Path dst = new Path("E:\\workspace\\java\\hadoop-project\\hdfs-basic\\data\\demo1.txt");
fileSystem.copyToLocalFile(src, dst);
}
}
2.3.2 使用IO流
package com.xk.bigdata.hadoop.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.*;
import java.net.URI;
public class HDFSAPITest {
FileSystem fileSystem = null;
@Before
public void setUp() throws Exception {
Configuration conf = new Configuration();
conf.set("dfs.replication", "1");
URI uri = new URI("hdfs://bigdatatest02:8020");
fileSystem = FileSystem.get(uri, conf, "hdfs");
}
@After
public void cleanUp() throws Exception {
if (null != fileSystem) fileSystem.close();
}
@Test
public void copyToLocalFileByIo() throws Exception {
FSDataInputStream inputStream = fileSystem.open(new Path("/demo/demo1.txt"));
BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(new File("E:\\workspace\\java\\hadoop-project\\hdfs-basic\\data\\demo2.txt")));
IOUtils.copyBytes(inputStream, outputStream, 2048);
IOUtils.closeStream(inputStream);
IOUtils.closeStream(outputStream);
}
}
等效于 hadoop fs -get/hdfs dfs -get
2.4 修改HDFS上面的文件名称
package com.xk.bigdata.hadoop.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.net.URI;
public class HDFSAPITest {
FileSystem fileSystem = null;
@Before
public void setUp() throws Exception {
Configuration conf = new Configuration();
conf.set("dfs.replication", "1");
URI uri = new URI("hdfs://bigdatatest02:8020");
fileSystem = FileSystem.get(uri, conf, "hdfs");
}
@After
public void cleanUp() throws Exception {
if (null != fileSystem) fileSystem.close();
}
@Test
public void reName() throws Exception {
// 原文件路径
Path src = new Path("/demo/demo.txt");
// 新文件路径
Path dst = new Path("/demo/demo1.txt");
boolean res = fileSystem.rename(src, dst);
System.out.println(res);
}
等效于 hadoop fs -mv / hdfs dfs -mv
2.5 列出文件列表
package com.xk.bigdata.hadoop.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.*;
import java.net.URI;
public class HDFSAPITest {
FileSystem fileSystem = null;
@Before
public void setUp() throws Exception {
Configuration conf = new Configuration();
conf.set("dfs.replication", "1");
URI uri = new URI("hdfs://bigdatatest02:8020");
fileSystem = FileSystem.get(uri, conf, "hdfs");
}
@After
public void cleanUp() throws Exception {
if (null != fileSystem) fileSystem.close();
}
@Test
public void listFiles() throws Exception {
/**
* Path f : 路径
* boolean recursive : 是否递归
*/
RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(new Path("/demo"), true);
while (files.hasNext()) {
LocatedFileStatus file = files.next();
String path = file.getPath().toString().trim();
String isdic = file.isDirectory() ? "文件夹" : "文件";
String owner = file.getOwner();
System.out.println(isdic + "\t" + path + "\t" + owner);
// 得到该文件的副本地址
BlockLocation[] blockLocations = file.getBlockLocations();
for (BlockLocation blockLocation : blockLocations) {
// 得到该副本的存储地址
String[] hosts = blockLocation.getHosts();
for (String host : hosts) {
System.out.println(host);
}
}
}
}
}
等效于 hadoop fs -ls / hdfs dfs -ls
2.6 删除文件或者文件夹
package com.xk.bigdata.hadoop.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.*;
import java.net.URI;
public class HDFSAPITest {
FileSystem fileSystem = null;
@Before
public void setUp() throws Exception {
Configuration conf = new Configuration();
conf.set("dfs.replication", "1");
URI uri = new URI("hdfs://bigdatatest02:8020");
fileSystem = FileSystem.get(uri, conf, "hdfs");
}
@After
public void cleanUp() throws Exception {
if (null != fileSystem) fileSystem.close();
}
@Test
public void delete() throws Exception {
/**
* Path f : 删除文件的路径
* boolean recursive : 是否递归
*/
boolean res = fileSystem.delete(new Path("/demo"), true);
System.out.println(res);
}
}
3 使用HDFS API 案例
3.1 需求
使用HDFS API 封装一个 rich reName 方法
效果:
/bigdata/hdfs-works/20211001
/1.txt
/2.txt
/3.txt
/bigdata/hdfs-works/20211002
/1.txt
/2.txt
/3.txt
==>
/bigdata/hdfs-works/20211001-0.txt
/bigdata/hdfs-works/20211001-1.txt
/bigdata/hdfs-works/20211001-2.txt
/bigdata/hdfs-works/20211002-0.txt
/bigdata/hdfs-works/20211002-1.txt
/bigdata/hdfs-works/20211002-2.txt
3.2 Code
3.2.1 HDFSUtils
package com.xk.bigdata.hadoop.utils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import java.net.URI;
public class HDFSUtils {
FileSystem fileSystem = null;
/**
* 文件系统初始化
*/
public void stepUp() throws Exception {
Configuration conf = new Configuration();
conf.set("dfs.replication", "1");
URI uri = new URI(FinalCode.HDFS_URI);
fileSystem = FileSystem.get(uri, conf, FinalCode.HDFS_USER_NAME);
}
/**
* 关闭文件系统
*/
public void cleanUp() throws Exception {
if (null != fileSystem) {
fileSystem.close();
}
}
/**
* 创建文件夹
*
* @param path : 需要创建的目录
*/
public boolean mkdir(String path) throws Exception {
return fileSystem.mkdirs(new Path(path));
}
/**
* 从本地上传文件
*
* @param srcPath : 文件地址
* @param dstPath : 目标地址
*/
public void copyFromLocalFile(String srcPath, String dstPath) throws Exception {
// 文件地址
Path src = new Path(srcPath);
// 目标地址
Path dst = new Path(dstPath);
fileSystem.copyFromLocalFile(src, dst);
}
/**
* 从HDFS下载文件
*
* @param srcPath : 文件地址
* @param dstPath : 目标地址
*/
public void copyToLocalFile(String srcPath, String dstPath) throws Exception {
// HDFS路径
Path src = new Path(srcPath);
// 本地路径
Path dst = new Path(dstPath);
fileSystem.copyToLocalFile(src, dst);
}
/**
* 修改名称
*
* @param oldPath : 原路径
* @param newPath : 新的路径
*/
public boolean reName(String oldPath, String newPath) throws Exception {
// 原文件路径
Path src = new Path(oldPath);
// 新文件路径
Path dst = new Path(newPath);
return fileSystem.rename(src, dst);
}
/**
* 文件列表
*
* @param pathString : 路径
* @param recursive : 是否递归
*/
public RemoteIterator<LocatedFileStatus> listFiles(String pathString, Boolean recursive) throws Exception {
return fileSystem.listFiles(new Path(pathString), recursive);
}
/**
* 文件列表
*
* @param pathString : 路径
*/
public RemoteIterator<LocatedFileStatus> listFiles(String pathString) throws Exception {
return fileSystem.listFiles(new Path(pathString), true);
}
/**
* 删除文件或者文件夹
*
* @param pathString : 删除文件的路径
* @param recursive : 是否递归
*/
public boolean delete(String pathString, boolean recursive) throws Exception {
return fileSystem.delete(new Path(pathString), recursive);
}
/**
* 删除文件
*
* @param pathString : 删除文件的路径
*/
public boolean delete(String pathString) throws Exception {
return fileSystem.delete(new Path(pathString), true);
}
/**
* 判断路径是否存在
*
* @param pathString : 路径
*/
public boolean isExist(String pathString) throws Exception {
return fileSystem.exists(new Path(pathString));
}
}
3.2.2 FinalCode
package com.xk.bigdata.hadoop.utils;
public class FinalCode {
// HDFS param
public final static String HDFS_URI = "hdfs://bigdatatest02:8020";
public final static String HDFS_USER_NAME = "hdfs";
}
3.2.2 HDSFReName
package com.xk.bigdata.hadoop.hdfs;
import com.xk.bigdata.hadoop.utils.HDFSUtils;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.RemoteIterator;
public class HDSFReName {
/**
* 1. 通过路径读取下面层级文件的全部路径
* 2. 修改文件路径
* 3. 删除原文件夹路径
*/
public static void reName(String pathString) {
HDFSUtils hdfsUtils = new HDFSUtils();
try {
hdfsUtils.stepUp();
RemoteIterator<LocatedFileStatus> files = hdfsUtils.listFiles(pathString);
while (files.hasNext()) {
LocatedFileStatus file = files.next();
String filePath = file.getPath().toString();
String newFilePath = filePath.substring(0, filePath.lastIndexOf("/")) + "-" + filePath.substring(filePath.lastIndexOf("/") + 1);
boolean res = hdfsUtils.reName(filePath, newFilePath);
System.out.println(res);
}
System.out.println(hdfsUtils.delete(pathString));
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
hdfsUtils.cleanUp();
} catch (Exception e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) {
String pathString = "/bigdata/hdfs-works/20211001";
reName(pathString);
}
}