记录springboot操作hdfs过程中遇到的一些问题。
一、pom文件引入依赖:
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.3.6</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.3.6</version>
</dependency>
二、编写applicaton.yml、config、service、controller。
demo代码主要参考地址:https://kgithub.com/WinterChenS/springboot-learning-experience/blob/master/spring-boot-hadoop
hdfs:
hdfsPath: hdfs://192.xxx.xxx.xx:8888
hdfsName: xxx
package com.guo.self.dubai.hadoop;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.fs.FileSystem;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
/**
* @author:guoq
* @date:2023/7/28
* @descripion:
*/
@Slf4j
@Configuration
public class HadoopConfig {
@Value("${hdfs.hdfsPath}")
private String hdfsPath;
@Value("${hdfs.hdfsName}")
private String hdfsName;
@Bean
public org.apache.hadoop.conf.Configuration getConfiguration(){
org.apache.hadoop.conf.Configuration config = new org.apache.hadoop.conf.Configuration();
config.set("fs.defaultFS",hdfsPath);
return config;
}
@Bean
public FileSystem getFileSystem(){
FileSystem fileSystem=null;
try {
fileSystem= FileSystem.get(new URI(hdfsPath), getConfiguration(), hdfsName);
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (URISyntaxException e) {
e.printStackTrace();
}
return fileSystem;
}
}
package com.guo.self.dubai.hadoop.hdfs;
import org.apache.hadoop.fs.BlockLocation;
import org.springframework.web.multipart.MultipartFile;
import java.util.List;
import java.util.Map;
/**
* @author:guoq
* @date:2023/7/31
* @descripion:
*/
public interface HDFSService {
//文件是否存在
boolean existFile(String path);
//目录
List<Map<String, Object>> readCatalog(String path);
//读取文件内容
String readFileContent(String path);
//创建文件 hdfs的create方法+输出流的write
boolean createFile(String path, MultipartFile file);
//上传文件 将本地已有的文件上传到hdfs服务器, 源码底层调用的是hdfs的create方法+输出流的write
boolean uploadFile(String path, String uploadPath);
//下载文件 将hdfs服务器已有的文件下载到本地, 源码底层调用的是hdfs的create方法+输出流的write
boolean downloadFile(String path, String downloadPath);
//删除文件
boolean deleteFile(String path);
BlockLocation[] getFileBlockLocations(String path);
}
package com.guo.self.dubai.hadoop.hdfs;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.fs.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.StringUtils;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.util.*;
/**
* @author:guoq
* @date:2023/7/31
* @descripion:
*/
@Service
@Slf4j
public class HDFSServiceImpl implements HDFSService{
@Autowired
private FileSystem fileSystem;
@Override
public boolean existFile(String path) {
if (StringUtils.isEmpty(path)){
return false;
}
Path src = new Path(path);
try {
return fileSystem.exists(src);
} catch (IOException e) {
log.error(e.getMessage());
}
return false;
}
@Override
public List<Map<String, Object>> readCatalog(String path) {
if (StringUtils.isEmpty(path)){
return Collections.emptyList();
}
if (!existFile(path)){
log.error("catalog is not exist!!");
return Collections.emptyList();
}
Path src = new Path(path);
FileStatus[] fileStatuses = null;
try {
fileStatuses = fileSystem.listStatus(src);
} catch (IOException e) {
log.error(e.getMessage());
}
List<Map<String, Object>> result = new ArrayList<>(fileStatuses.length);
if (null != fileStatuses && 0 < fileStatuses.length) {
for (FileStatus fileStatus : fileStatuses) {
Map<String, Object> cataLogMap = new HashMap<>();
cataLogMap.put("filePath", fileStatus.getPath());
cataLogMap.put("fileStatus", fileStatus);
result.add(cataLogMap);
}
}
return result;
}
@Override
public boolean createFile(String path, MultipartFile file) {
boolean target = false;
if (StringUtils.isEmpty(path)) {
return false;
}
String fileName = file.getOriginalFilename();
// String fileName = file.getName();
Path newPath = new Path(path + "/" + fileName);
FSDataOutputStream outputStream = null;
try {
outputStream = fileSystem.create(newPath);
outputStream.write(file.getBytes());
target = true;
} catch (IOException e) {
log.error(e.getMessage());
} finally {
if (null != outputStream) {
try {
outputStream.close();
} catch (IOException e) {
log.error(e.getMessage());
}
}
}
return target;
}
@Override
public boolean uploadFile(String path, String uploadPath) {
if (StringUtils.isEmpty(path) || StringUtils.isEmpty(uploadPath)) {
return false;
}
Path clientPath = new Path(path);
Path serverPath = new Path(uploadPath);
try {
fileSystem.copyFromLocalFile(false,clientPath,serverPath);
return true;
} catch (IOException e) {
log.error(e.getMessage(), e);
}
return false;
}
@Override
public boolean downloadFile(String path, String downloadPath) {
if (StringUtils.isEmpty(path) || StringUtils.isEmpty(downloadPath)) {
return false;
}
Path clienPath = new Path(path);
Path targetPath = new Path(downloadPath);
try {
fileSystem.copyToLocalFile(false,clienPath, targetPath);
return true;
} catch (IOException e) {
log.error(e.getMessage());
}
return false;
}
@Override
public boolean deleteFile(String path) {
boolean target = false;
if (StringUtils.isEmpty(path)) {
return false;
}
if (!existFile(path)) {
return false;
}
Path src = new Path(path);
try {
target = fileSystem.deleteOnExit(src);
} catch (IOException e) {
log.error(e.getMessage());
}
return target;
}
@Override
public BlockLocation[] getFileBlockLocations(String path) {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!existFile(path)) {
return null;
}
BlockLocation[] blocks = null;
Path src = new Path(path);
try{
FileStatus fileStatus = fileSystem.getFileStatus(src);
blocks = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
}catch(Exception e){
log.error(e.getMessage());
}
return blocks;
}
@Override
public String readFileContent(String path) {
if (StringUtils.isEmpty(path)){
return null;
}
if (!existFile(path)) {
return null;
}
Path src = new Path(path);
FSDataInputStream inputStream = null;
StringBuilder sb = new StringBuilder();
try {
inputStream = fileSystem.open(src);
String lineText = "";
while ((lineText = inputStream.readLine()) != null) {
sb.append(lineText);
}
} catch (IOException e) {
log.error(e.getMessage());
} finally {
if (null != inputStream) {
try {
inputStream.close();
} catch (IOException e) {
log.error(e.getMessage());
}
}
}
return sb.toString();
}
}
package com.guo.self.dubai.hadoop;
import com.guo.self.dubai.hadoop.mapreduce.WordCountDriver;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.fs.FileSystem;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author:guoq
* @date:2023/7/28
* @descripion:
*/
@Slf4j
@Configuration
public class HadoopConfig {
@Value("${hdfs.hdfsPath}")
private String hdfsPath;
@Value("${hdfs.hdfsName}")
private String hdfsName;
@Bean
public org.apache.hadoop.conf.Configuration getConfiguration(){
org.apache.hadoop.conf.Configuration config = new org.apache.hadoop.conf.Configuration();
config.set("fs.defaultFS",hdfsPath);
return config;
}
@Bean
public FileSystem getFileSystem(){
FileSystem fileSystem=null;
try {
fileSystem= FileSystem.get(new URI(hdfsPath), getConfiguration(), hdfsName);
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (URISyntaxException e) {
e.printStackTrace();
}
return fileSystem;
}
}
三、启动报错:java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset
- 把centos7上安装的hadoop.xxx.tar.gz解压到windows本地
- 下载对应版本的winutils.exe和hadoop.dll,(下载地址:文件 · master · mirrors / cdarlint / winutils · GitCode),并复制到第一步步骤解压的bin目录下
- 配置本地环境变量
- 找到第一步解压的文件,找到hadoop.xxxx/ 与bin目录同级的etc文件,进入里层文件夹,编辑hadoop-env.cmd,找到set JAVA_HOME=%JAVA_HOME%,替换成你本地的jdk环境,提示如果本地配置文件夹中有空格,会不生效,可以新建一个链接文件,链接到目标文件,把链接文件夹设置到环境变量,命令:mklink/J D:\ProgramFiles "D:\Program Files",
- 执行hadoop version即可
四、启动报错:java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
参考文章:解决Hadoop在本地(windows)操作报错:Could not locate executable null\bin\winutils.exe in the Hadoop binaries._hadoop windows 解压失败_D奋斗的小菜鸟!的博客-CSDN博客,配置了CLASSPATH即可
五:正常编写代码
六:效果,能成功调用