文章目录
SpringBoot 整合HDFS
1 window本地测试前置操作
- 本地安装hadhoop
百度云盘下载
提取码:ai7v
2 整合代码
2.1 pom
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.30</version>
</dependency>
2.2 src/main/resources下log4j.properties
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
2.3 src/main/resources下hdfs-site.xml
该文件默认加载
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
2.2 code
- application.properties
## hdfs 配置
# hdfs 连接url
nameNode.url=hdfs://127.0.0.1:8020
# 用户名
hdfs.userName=root
# 操作hdfs的存储节点
hdfs.dataNode=/xxx/xxx/dataNodeFile/
- HdfsConfig配置文件
/**
* hdfs配置
*/
@Configuration
@Data
public class HdfsConfig {
// hdfs nameNode连接URL
@Value("${nameNode.url}")
private String nameNodeUrl;
// 操作用户
@Value("${hdfs.userName}")
private String hdfsUserName;
// 操作存储节点路径
@Value("${hdfs.dataNode}")
private String pdfDataNode;
}
- 工具类 util
@Component
public class HdfsUtil {
public static final Logger logger = LoggerFactory.getLogger(HdfsUtil.class);
@Autowired
private HdfsConfig hdfsConfig;
/**
* 获取HDFS配置信息 配置文件优先级
* Configuration > resource下的hdfs-site.xml > 服务器上的 hdfs-default.xml
*
* @return
*/
private Configuration getConfiguration() {
Configuration configuration = new Configuration();
configuration.set("dfs.support.append", "true");
configuration.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true");
configuration.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
return configuration;
}
/**
* 获取HDFS文件系统对象
*
* @return
* @throws Exception
*/
public FileSystem getFileSystem() throws Exception {
// 客户端去操作hdfs时是有一个用户身份的,默认情况下hdfs客户端api会从jvm中获取一个参数作为自己的用户身份
// DHADOOP_USER_NAME=hadoop
// 也可以在构造客户端fs对象时,通过参数传递进去
FileSystem fileSystem = FileSystem.get(new URI(hdfsConfig.getNameNodeUrl()), getConfiguration(), hdfsConfig.getHdfsUserName());
return fileSystem;
}
/**
* 在HDFS创建文件夹
*
* @param path
* @return
* @throws Exception
*/
public boolean mkdir(String path) throws Exception {
FileSystem fs = null;
boolean isOk = false;
if (StringUtils.isEmpty(path)) {
return false;
}
try {
if (existFile(path)) {
logger.error("hdfs file is exists: {}", path);
return true;
}
// 目标路径
fs = getFileSystem();
Path srcPath = new Path(path);
isOk = fs.mkdirs(srcPath);
logger.error("hdfs mkdir success: {}", path);
} catch (Exception e) {
logger.error("hdfs mkdir: {}", e);
} finally {
if (fs != null) {
fs.close();
}
}
return isOk;
}
/**
* 判断HDFS文件是否存在
*
* @param path
* @return
* @throws Exception
*/
public boolean existFile(String path) throws Exception {
Boolean isExists = false;
FileSystem fs = null;
if (StringUtils.isEmpty(path)) {
return false;
}
try {
fs = getFileSystem();
Path srcPath = new Path(path);
isExists = fs.exists(srcPath);
} catch (Exception e) {
logger.error("existFile {}", e);
} finally {
if (fs != null) {
fs.close();
}
}
return isExists;
}
/**
* 读取HDFS目录信息
*
* @param path
* @return
* @throws Exception
*/
public List<Map<String, Object>> readPathInfo(String path) throws Exception {
try {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!existFile(path)) {
return null;
}
FileSystem fs = getFileSystem();
// 目标路径
Path newPath = new Path(path);
FileStatus[] statusList = fs.listStatus(newPath);
List<Map<String, Object>> list = new ArrayList<>();
if (null != statusList && statusList.length > 0) {
for (FileStatus fileStatus : statusList) {
Map<String, Object> map = new HashMap<>();
map.put("filePath", fileStatus.getPath());
map.put("fileStatus", fileStatus.toString());
list.add(map);
}
return list;
}
} catch (Exception e) {
logger.error("hdfs readPathInfo {}", e);
}
return null;
}
/**
* HDFS创建文件
*
* @param path 上传的路径
* @param file
* @throws Exception
*/
public void createFile(String path, MultipartFile file) throws Exception {
if (StringUtils.isEmpty(path) || null == file.getBytes()) {
return;
}
FileSystem fs = null;
FSDataOutputStream outputStream = null;
try {
fs = getFileSystem();
String fileName = file.getOriginalFilename();
// 上传时默认当前目录,后面自动拼接文件的目录
Path newPath = new Path(path + "/" + fileName);
// 打开一个输出流
outputStream = fs.create(newPath);
outputStream.write(file.getBytes());
outputStream.flush();
} catch (Exception e) {
throw e;
} finally {
if (outputStream != null) {
outputStream.close();
}
if (fs != null) {
fs.close();
}
}
}
/**
* 直接往输出流输出文件
*
* @param path 活动方式 远程文件
* @param os 输出流
* @return
* @throws Exception
*/
public void writeOutputStreamFile(OutputStream os, String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return;
}
/* if (!existFile(path)) {
// 文件不存在则抛出异常
throw new Exception(path + " hdfs文件不存在");
}*/
FileSystem fs = null;
FSDataInputStream inputStream = null;
try {
// 目标路径
Path srcPath = new Path(path);
fs = getFileSystem();
inputStream = fs.open(srcPath);
// 防止中文乱码
// BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
fileDownload(os, new BufferedInputStream(inputStream));
} catch (Exception e) {
throw e;
} finally {
if(inputStream != null) {
inputStream.close();
}
if(fs != null) {
fs.close();
}
}
}
/**
* 读取HDFS文件内容
*
* @param path
* @return
* @throws Exception
*/
public String readFile(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!existFile(path)) {
return null;
}
FileSystem fs = null;
FSDataInputStream inputStream = null;
try {
// 目标路径
Path srcPath = new Path(path);
fs = getFileSystem();
inputStream = fs.open(srcPath);
// 防止中文乱码
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
String lineTxt = "";
StringBuffer sb = new StringBuffer();
while ((lineTxt = reader.readLine()) != null) {
sb.append(lineTxt);
}
return sb.toString();
} finally {
if (inputStream != null) {
inputStream.close();
}
if (fs != null) {
fs.close();
}
}
}
/**
* 读取HDFS文件列表
*
* @param path
* @return
* @throws Exception
*/
public List<Map<String, String>> listFile(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!existFile(path)) {
return null;
}
FileSystem fs = null;
try {
fs = getFileSystem();
// 目标路径
Path srcPath = new Path(path);
// 递归找到所有文件
RemoteIterator<LocatedFileStatus> filesList = fs.listFiles(srcPath, true);
List<Map<String, String>> returnList = new ArrayList<>();
while (filesList.hasNext()) {
LocatedFileStatus next = filesList.next();
String fileName = next.getPath().getName();
Path filePath = next.getPath();
Map<String, String> map = new HashMap<>();
map.put("fileName", fileName);
map.put("filePath", filePath.toString());
returnList.add(map);
}
return returnList;
} catch (Exception e) {
logger.error("hdfs listFile {}", e);
} finally {
if (fs != null) {
fs.close();
}
}
return null;
}
/**
* HDFS重命名文件
*
* @param oldName
* @param newName
* @return
* @throws Exception
*/
public boolean renameFile(String oldName, String newName) throws Exception {
if (StringUtils.isEmpty(oldName) || StringUtils.isEmpty(newName)) {
return false;
}
FileSystem fs = null;
Boolean isOk = false;
try {
fs = getFileSystem();
// 原文件目标路径
Path oldPath = new Path(oldName);
// 重命名目标路径
Path newPath = new Path(newName);
isOk = fs.rename(oldPath, newPath);
return isOk;
} catch (Exception e) {
logger.error("hdfs renameFile {}", e);
} finally {
if (fs != null) {
fs.close();
}
}
return isOk;
}
/**
* 删除HDFS文件
*
* @param path
* @return
* @throws Exception
*/
public boolean deleteFile(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return false;
}
FileSystem fs = null;
Boolean isOk = false;
try {
if (!existFile(path)) {
return false;
}
fs = getFileSystem();
Path srcPath = new Path(path);
isOk = fs.deleteOnExit(srcPath);
} catch (Exception e) {
logger.error("hdfs deleteFile {}", e);
} finally {
if (fs != null) {
fs.close();
}
}
return isOk;
}
/**
* 上传HDFS文件
*
* @param path 上传路径(本服务器文件全路径)
* @param uploadPath 目标路径(全节点路径)
* @throws Exception
*/
public void uploadFile(String path, String uploadPath) throws Exception {
if (StringUtils.isEmpty(path) || StringUtils.isEmpty(uploadPath)) {
return;
}
FileSystem fs = null;
try {
fs = getFileSystem();
// 上传路径
Path clientPath = new Path(path);
// 目标路径
Path serverPath = new Path(uploadPath);
// 调用文件系统的文件复制方法,第一个参数是否删除原文件true为删除,默认为false
fs.copyFromLocalFile(false, clientPath, serverPath);
} catch (Exception e) {
logger.error("hdfs uploadFile {}", e);
} finally {
if (fs != null) {
fs.close();
}
}
}
/**
* 下载HDFS文件
*
* @param path hdfs目标路径
* @param downloadPath 客户端存放路径
* @throws Exception
*/
public void downloadFile(String path, String downloadPath) throws Exception {
if (StringUtils.isEmpty(path) || StringUtils.isEmpty(downloadPath)) {
return;
}
FileSystem fs = null;
try {
fs = getFileSystem();
// hdfs目标路径
Path clientPath = new Path(path);
// 客户端存放路径
Path serverPath = new Path(downloadPath);
// 调用文件系统的文件复制方法,第一个参数是否删除原文件true为删除,默认为false
fs.copyToLocalFile(false, clientPath, serverPath);
} catch (Exception e) {
logger.error("hdfs downloadFile {}", e);
} finally {
if (fs != null) {
fs.close();
}
}
}
/**
* HDFS文件复制
* @param sourcePath
* @param targetPath
* @throws Exception
*/
/*public void copyFile(String sourcePath, String targetPath) throws Exception {
if (StringUtils.isEmpty(sourcePath) || StringUtils.isEmpty(targetPath)) {
return;
}
FileSystem fs = getFileSystem();
// 原始文件路径
Path oldPath = new Path(sourcePath);
// 目标路径
Path newPath = new Path(targetPath);
FSDataInputStream inputStream = null;
FSDataOutputStream outputStream = null;
try {
inputStream = fs.open(oldPath);
outputStream = fs.create(newPath);
IOUtils.copyBytes(inputStream, outputStream, bufferSize, false);
} finally {
inputStream.close();
outputStream.close();
fs.close();
}
}
*//**
* 打开HDFS上的文件并返回byte数组
* @param path
* @return
* @throws Exception
*//*
public byte[] openFileToBytes(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!existFile(path)) {
return null;
}
FileSystem fs = getFileSystem();
// 目标路径
Path srcPath = new Path(path);
try {
FSDataInputStream inputStream = fs.open(srcPath);
return IOUtils.readFullyToByteArray(inputStream);
} finally {
fs.close();
}
}
*//**
* 打开HDFS上的文件并返回java对象
* @param path
* @return
* @throws Exception
*//*
public <T extends Object> T openFileToObject(String path, Class<T> clazz) throws Exception {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!existFile(path)) {
return null;
}
String jsonStr = readFile(path);
return JsonUtil.fromObject(jsonStr, clazz);
}
*//**
* 获取某个文件在HDFS的集群位置
* @param path
* @return
* @throws Exception
*//*
public BlockLocation[] getFileBlockLocations(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!existFile(path)) {
return null;
}
FileSystem fs = getFileSystem();
// 目标路径
Path srcPath = new Path(path);
FileStatus fileStatus = fs.getFileStatus(srcPath);
return fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
}
*/
/**
* @param os response输出流
* @param bis 输入流
*/
private void fileDownload(OutputStream os, BufferedInputStream bis) throws Exception {
if (bis == null) {
return;
}
try {
byte[] buff = new byte[1024];
int i = bis.read(buff);
while (i != -1) {
os.write(buff, 0, i);
os.flush();
i = bis.read(buff);
}
} catch (IOException e) {
throw e;
} finally {
if (bis != null) {
try {
bis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
2.3 code - v2
package com.pupu.bigdata.stream.utils;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.util.ObjectUtil;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
*
*/
@Slf4j
public class DynamicHDFSClient {
// hdfs client
private FileSystem fs;
Configuration conf;
public DynamicHDFSClient(String hadoopConfig) {
// hadoopConfig路径
/* 设置hdfs连接参数 */
conf = new Configuration();
if (StringUtils.isBlank(hadoopConfig)) {
log.info("add resources hadoop default config.");
//本地测试加载
conf.addResource("core-site.xml");
conf.addResource("hdfs-site.xml");
} else {
log.info("add {} hadoop config.", hadoopConfig);
conf.addResource(new Path(hadoopConfig, "core-site.xml"));
conf.addResource(new Path(hadoopConfig, "hdfs-site.xml"));
}
// 本地测试记得把这个加上!(如果不加此配置,会导致映射不到,出现上传下载卡住超时异常)
// conf.set("dfs.client.use.datanode.hostname", "true");
// 缓存 fs, 避免多 datanode 异常:Caused by: java.io.IOException: Filesystem closed
conf.setBoolean("fs.hdfs.impl.disable.cache", true);
/* get client */
try {
log.info("get hdfs client");
fs = FileSystem.get(conf);
log.info("get hdfs client success");
} catch (Exception e) {
log.info("get hdfs client error: {}", Throwables.getStackTraceAsString(e));
}
}
/* 手动关闭客户端 */
public void close() {
try {
if (ObjectUtil.isNotNull(fs)) {
fs.close();
}
} catch (IOException e) {
log.error("hdfs close error: {}", Throwables.getStackTraceAsString(e));
}
}
/**
* 本地文件上传hdfs
*
* @param localFileStr
* @param dstFileStr
*/
public void copyFromLocalFile(String localFileStr, String dstFileStr) {
copyFromLocalFile(true, localFileStr, dstFileStr);
}
/**
* 本地文件上传hdfs
*
* @param override
* @param localFileStr
* @param dstFileStr
*/
public void copyFromLocalFile(Boolean override, String localFileStr, String dstFileStr) {
try {
fs.copyFromLocalFile(false, override, new Path(localFileStr), new Path(dstFileStr));
} catch (Exception e) {
log.error("hdfs copyFromLocalFile error: {}", Throwables.getStackTraceAsString(e));
}
}
/**
* 创建hdfs文件
*/
public FSDataOutputStream createFile2(String filename) throws Exception {
Path filePath = new Path(filename);
return fs.create(filePath, true);
}
/**
* 读取hdfs指定目录下文件列表
*
* @param path
* @param recursion
* @throws Exception
*/
public List<FileStatus> listFile2(Path path, boolean recursion) throws Exception {
List<FileStatus> files = new ArrayList<>();
log.info(path.toString());
FileStatus[] fileStatusList = fs.listStatus(path);
for (FileStatus fileStatus : fileStatusList) {
if (fileStatus.isDirectory()) {
if (recursion) {
files.addAll(listFile2(fileStatus.getPath(), true));
} else {
files.add(fileStatus);
}
} else {
files.add(fileStatus);
}
}
return files;
}
/**
* 判断文件是否存在
*
* @param fileName
* @return
* @throws Exception
*/
public boolean checkFileExist(String fileName) throws Exception {
Path path = new Path(fileName);
return fs.exists(path);
}
/**
* @param path
* @param recursion 是否包括子目录文件
* @return
* @throws Exception
*/
public List<String> listFile(Path path, boolean recursion) throws Exception {
List<FileStatus> fileStatusList = listFile2(path, recursion);
if (CollectionUtils.isEmpty(fileStatusList)) {
return Lists.newArrayList();
}
List<String> files = Lists.newArrayList();
for (FileStatus fileStatus : fileStatusList) {
if (fileStatus.isDirectory()) {
if (recursion) {
files.addAll(listFile(fileStatus.getPath(), recursion));
} else {
files.add(fileStatus.getPath().toString());
}
} else {
files.add(fileStatus.getPath().toString());
}
}
return files;
}
/**
* 获取path目录下 & 文件名包含containsKey & 最后生成的目录
*
* @param path
* @param containsKey
* @return
* @throws Exception
*/
public String getLastDir(Path path, String containsKey) throws Exception {
String file = "";
FileStatus[] fileStatusList = fs.listStatus(path);
long modTime = 0;
for (FileStatus fileStatus : fileStatusList) {
String tmpFile = fileStatus.getPath().toString();
if (fileStatus.getModificationTime() > modTime
&& tmpFile.contains(containsKey)
&& fileStatus.isDirectory()
&& listFile(fileStatus.getPath(), false).size() > 0
) {
file = tmpFile;
modTime = fileStatus.getModificationTime();
}
}
return file;
}
/**
* 在HDFS创建文件夹
*
* @param path
* @return
* @throws Exception
*/
public boolean mkdir(String path) {
boolean isOk = false;
if (StringUtils.isBlank(path)) {
return false;
}
try {
if (existFile(path)) {
log.info("hdfs file is exists: {}", path);
return true;
}
Path srcPath = new Path(path);
isOk = fs.mkdirs(srcPath);
log.info("hdfs mkdir success: {}", path);
} catch (Exception e) {
log.error("hdfs mkdir: {}, error: {}", path, Throwables.getStackTraceAsString(e));
}
return isOk;
}
/**
* 判断HDFS文件是否存在
*
* @param path
* @return true 存在 false 不存在
* @throws Exception
*/
public boolean existFile(String path) {
boolean isExists = false;
if (StringUtils.isEmpty(path)) {
return false;
}
try {
Path srcPath = new Path(path);
isExists = this.fs.exists(srcPath);
} catch (Exception e) {
log.error("existFile error: {}", Throwables.getStackTraceAsString(e));
}
return isExists;
}
/**
* HDFS远程文件复制
*
* @param sourcePath 资源文件path
* @param targetPath 目标文件path
* @throws Exception
*/
public void copyRemoteFile(String sourcePath, String targetPath) {
try {
if (StringUtils.isEmpty(sourcePath) || StringUtils.isEmpty(targetPath)) {
return;
}
// 1. 资源文件不存在直接返回
if (!existFile(sourcePath)) {
log.info("copyRemoteFile sourcePath {} is not exist", sourcePath);
return;
}
/* 2. copy remote*/
// source
Path oldPath = new Path(sourcePath);
// target
Path newPath = new Path(targetPath);
FileUtil.copy(fs, oldPath, fs, newPath, false, conf);
log.info("remote cope file success,sourcePath:{}, targetPath:{}", sourcePath, targetPath);
} catch (Exception e) {
log.error("hdfs copyRemoteFile error: {}", Throwables.getStackTraceAsString(e));
}
}
/**
* 删除HDFS文件 不知处删除目录
*
* @param path
* @throws Exception
* @returnhadoo
*/
public boolean deleteFile(String path) {
if (StringUtils.isEmpty(path)) {
return false;
}
boolean isOk = false;
try {
if (!existFile(path)) {
return false;
}
Path srcPath = new Path(path);
isOk = fs.deleteOnExit(srcPath);
} catch (Exception e) {
log.error("hdfs deleteFile {}", Throwables.getStackTraceAsString(e));
}
return isOk;
}
/**
* 删除hdf目录
*
* @param path
* @return
*/
public void deleteDir(String path) {
try {
if (!existFile(path)) {
return;
}
fs.delete(new Path(path), true);
} catch (IOException e) {
log.error("deleteDir error:{}", Throwables.getStackTraceAsString(e));
}
}
public static void main(String[] args) throws Exception {
DynamicHDFSClient hdfsClient = new DynamicHDFSClient("/config/conf");
//String sourcePath = "/user/flink/user-dependencies/depends-test.jar";
String sourcePath = "hdfs:///user/jaer/commons-cli-1.4.jar";
String targetPath = "/user/flink/tmp/test/commons-cli-1.4.jar";
System.out.println(sourcePath.substring(sourcePath.lastIndexOf("/") + 1));
//hdfsClient.copyRemoteFile(sourcePath, targetPath);
}
}
3 上传加载
// 上传文件 将C:\\Users\\dell\\Desktop\\SpringBoot.jpg文件上传到 /test hdfs节点下
hdfsUtil.uploadFile("C:\\Users\\dell\\Desktop\\SpringBoot.jpg", "/test");
// 下载文件
// 参数1:输出流 参数2:hdfs远程文件路径
hdfsUtil.writeOutputStreamFile(response.getOutputStream(), "/test/SpringBoot.jpg");