1. 环境部署
1.1 Linux hadoop集群搭建 Hadoop大数据集群搭建(超详细)_hadoop集群搭建-CSDN博客
1.2 windows hadoop util 安装
Hadoop——Windows系统下Hadoop单机环境搭建_hadoop windows开发环境搭建-CSDN博客
1.3 温馨提示,如果要使用java客户端的api,本地就必须需要安装hadoop才能调用,如果要脱离环境,可以使用WebHDFS,具体的可以搜索一下Hadoop REST API – WebHDFS
本项目是基于java客户端api实现的
2.Maven 配置
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.3.9</version>
</dependency>
<!-- hadoop -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.2.4</version>
</dependency>
3.hdfs java api 工具类
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnBean;
import org.springframework.stereotype.Component;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.text.SimpleDateFormat;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
/**
* @author maple
* @describe
* @createTime 2024/05/12
*/
@Component
@ConditionalOnBean(FileSystem.class)
public class HadoopTemplate {
private static final Logger log = LoggerFactory.getLogger(HadoopConfig.class);
@Autowired
private FileSystem fileSystem;
public void uploadFile(String srcFile, String destPath) {
copyFileToHDFS(false, true, srcFile, destPath);
}
public void uploadFile(boolean del, String srcFile, String destPath) {
copyFileToHDFS(del, true, srcFile, destPath);
}
public void delDir(String path) {
rmdir(path, null);
}
public void download(String fileName, String savePath) {
getFile(fileName, savePath);
}
/**
* 创建目录
*
* @param filePath
* @param create
* @return
*/
public boolean existDir(String filePath, boolean create) throws IOException {
boolean flag = false;
if (StringUtils.isEmpty(filePath)) {
throw new IllegalArgumentException("filePath不能为空");
}
Path path = new Path(filePath);
if (create) {
if (!fileSystem.exists(path)) {
fileSystem.mkdirs(path);
}
}
if (fileSystem.isDirectory(path)) {
flag = true;
}
return flag;
}
/**
* 创建目录
*
* @param filePath
* @return
*/
public boolean existFile(String filePath) throws IOException {
if (StringUtils.isEmpty(filePath)) {
throw new IllegalArgumentException("filePath不能为空");
}
Path path = new Path(filePath);
return fileSystem.exists(path);
}
/**
* 文件上传至 HDFS
*
* @param delSrc 指是否删除源文件,true 为删除,默认为 false
* @param overwrite
* @param srcFile 源文件,上传文件路径
* @param destPath hdfs的目的路径
*/
public void copyFileToHDFS(boolean delSrc, boolean overwrite, String srcFile, String destPath) {
// 源文件路径是Linux下的路径,如果在 windows 下测试,需要改写为Windows下的路径,比如D://hadoop/djt/weibo.txt
Path srcPath = new Path(srcFile);
Path dstPath = new Path(destPath);
// 实现文件上传
try {
// 获取FileSystem对象
fileSystem.copyFromLocalFile(delSrc, overwrite, srcPath, dstPath);
System.out.println(dstPath);
} catch (IOException e) {
log.error("", e);
}
}
/**
* 删除文件或者文件目录
*
* @param path
*/
public void rmdir(String path, String fileName) {
try {
if (StringUtils.isNotBlank(fileName)) {
path = path + "/" + fileName;
}
// 删除文件或者文件目录 delete(Path f) 此方法已经弃用
fileSystem.delete(new Path(path), true);
} catch (IllegalArgumentException | IOException e) {
log.error("", e);
}
}
/**
* 从 HDFS 下载文件
*
* @param hdfsFile
* @param destPath 文件下载后,存放地址
*/
public void getFile(String hdfsFile, String destPath) {
Path hdfsPath = new Path(hdfsFile);
Path dstPath = new Path(destPath);
try {
// 下载hdfs上的文件
fileSystem.copyToLocalFile(hdfsPath, dstPath);
} catch (IOException e) {
log.error("", e);
}
}
public void writer(String destPath, InputStream in) {
try {
FSDataOutputStream out = fileSystem.create(new Path(destPath));
IOUtils.copyBytes(in, out, fileSystem.getConf());
} catch (IOException e) {
e.printStackTrace();
}
}
public void open(String destPath, OutputStream out) {
FSDataInputStream in = null;
try {
in = fileSystem.open(new Path(destPath));
IOUtils.copyBytes(in,out,4096,false);
in.seek(0);
IOUtils.copyBytes(in,out,4096,false);
} catch (IOException e) {
e.printStackTrace();
} finally {
IOUtils.closeStream(in);
}
}
public String getFileExtension(String destPath) {
Path path = new Path(destPath);
FileStatus fileStatus = null;
try {
// 获取文件的状态信息
fileStatus = fileSystem.getFileStatus(path);
} catch (IOException e) {
log.info("获取文件的状态信息 IOException? " + e);
}
// 检查是否是目录
boolean isDir = fileStatus.isDirectory();
log.info("Is directory? " + isDir);
// 检查是否是文件
boolean isFile = fileStatus.isFile();
log.info("Is file? " + isFile);
// 如果是文件,可以获取文件的扩展名
if (isFile) {
String fileName = path.getName();
String fileExtension = fileName.substring(fileName.lastIndexOf('.') + 1);
log.info("File extension: " + fileExtension);
return fileExtension;
}
return "";
}
public static String getContentType(String destPath) throws IOException {
Path hdfsPath = new Path(destPath);
// 获取文件名
String fileName = hdfsPath.getName();
// 根据文件扩展名推断ContentType,这里只是一个简单的例子
if (fileName.endsWith(".txt")) {
return "text/plain";
} else if (fileName.endsWith(".jpg")) {
return "image/jpeg";
} else if (fileName.endsWith(".png")) {
return "image/png";
} else {
// 默认返回"application/octet-stream"
return "application/octet-stream";
}
}
//获取特定路径的所有文件
public void getFileList(String path) throws IOException {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
FileStatus[] fileStatuses = fileSystem.listStatus(new Path(path));
for(FileStatus fileStatus: fileStatuses) {
System.out.println(fileStatus.getPath().getName());
System.out.println(format.format(fileStatus.getModificationTime()));
if(fileStatus.isDirectory())
System.out.println("目录");
else
System.out.println("文件");
}
//排序,目录放前面,文件放后面
//Collator collator = Collator.getInstance(Locale.CHINA);
//fileList.sort((f1, f2) -> (collator.compare(f1.getType(), f2.getType())));
//return System.out.println(format.format(fileStatus.getModificationTime()));;
}
/**
* 多文件(文件夹)
*
* @param cloudPath
* cloudPath
* @author liudz
* @date 2020/6/8
* @return 执行结果
**/
public OutputStream down(String cloudPath,ZipOutputStream zos,ByteArrayOutputStream out) {
try {
compress(cloudPath, zos, fileSystem);
} catch (IOException e) {
log.info("----error:{}----" + e.getMessage());
}
return out;
}
/**
* 多文件(文件夹)
*
* @param cloudPath
* cloudPath
* @author liudz
* @date 2020/6/8
* @return 执行结果
**/
public OutputStream down2(String cloudPath) {
// 1获取对象
ByteArrayOutputStream out = null;
try {
out = new ByteArrayOutputStream();
ZipOutputStream zos = new ZipOutputStream(out);
compress(cloudPath, zos, fileSystem);
zos.close();
} catch (IOException e) {
log.info("----error:{}----" + e.getMessage());
}
return out;
}
/**
* compress
*
* @param baseDir
* baseDir
* @param zipOutputStream
* zipOutputStream
* @param fs
* fs
* @author liudz
* @date 2020/6/8
**/
public void compress(String baseDir, ZipOutputStream zipOutputStream, FileSystem fs) throws IOException {
try {
FileStatus[] fileStatulist = fs.listStatus(new Path(baseDir));
log.info("basedir = " + baseDir);
String[] strs = baseDir.split("/");
//lastName代表路径最后的单词
String lastName = strs[strs.length - 1];
for (int i = 0; i < fileStatulist.length; i++) {
String name = fileStatulist[i].getPath().toString();
name = name.substring(name.indexOf("/" + lastName));
if (fileStatulist[i].isFile()) {
Path path = fileStatulist[i].getPath();
FSDataInputStream inputStream = fs.open(path);
zipOutputStream.putNextEntry(new ZipEntry(name.substring(1)));
IOUtils.copyBytes(inputStream, zipOutputStream, Integer.parseInt("1024"));
inputStream.close();
} else {
zipOutputStream.putNextEntry(new ZipEntry(fileStatulist[i].getPath().getName() + "/"));
log.info("fileStatulist[i].getPath().toString() = " + fileStatulist[i].getPath().toString());
compress(fileStatulist[i].getPath().toString(), zipOutputStream, fs);
}
}
} catch (IOException e) {
log.info("----error:{}----" + e.getMessage());
}
}
}
4. 配置
/**
* @author maple
* @describe
* @createTime 2024-05-01
*/
@Configuration
public class HadoopConfig {
private static final Logger log = LoggerFactory.getLogger(HadoopConfig.class);
@Value("${hadoop.user}")
private String user;
@Value("${hadoop.password}")
private String password;
@Value("${hdfs.hdfs-site}")
private String hdfsSite;
@Value("${hdfs.core-site}")
private String coreSite;
@Bean("fileSystem")
public FileSystem createFs() throws Exception {
System.setProperty("HADOOP_USER_NAME", user);
System.setProperty("HADOOP_USER_PASSWORD", password);
//读取配置文件
org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
// conf.addResource(coreSite);
// conf.addResource(hdfsSite);
conf.set("fs.defaultFS",hdfsSite);
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
log.info("===============【hadoop configuration info start.】===============");
log.info("【hadoop conf】: size:{}, {}", conf.size(), conf.toString());
log.info("【fs.defaultFS】: {}", conf.get("fs.defaultFS"));
log.info("【fs.hdfs.impl】: {}", conf.get("fs.hdfs.impl"));
FileSystem fs = FileSystem.newInstance(conf);
log.info("【fileSystem scheme】: {}", fs.getScheme());
log.info("===============【hadoop configuration info end.】===============");
return fs;
}
}
hadoop:
user: user001
password: ******
hdfs:
hdfs-site: hdfs://hadoop101:9000
core-site:
5.使用案例
上传文件
/**
* 通用上传请求(单个)
*/
@PostMapping("/upload/{parentId}")
@Transactional
public AjaxResult uploadFile(MultipartFile file,@PathVariable Long parentId) throws Exception
{
try
{
String extension = FileUploadUtils.getExtension(file);
// 上传文件路径
String filePath = RuoYiConfig.getProfile();
// 获取当前用户本人的存储目录
DiskStorage diskStorage = diskStorageService.selectDiskStorageByUserId(SecurityUtils.getUserId());
if (Objects.isNull(diskStorage)) throw new ServiceException("未初始化存储空间");
if (diskStorage.getTotalCapacity()-diskStorage.getUsedCapacity()<=0) throw new ServiceException("存储空间不足");
if (parentId.equals(0L)) {
filePath = filePath+"/"+diskStorage.getBaseDir();
} else {
DiskFile parentIdFile = diskFileService.selectDiskFileById(parentId);
if (Objects.isNull(parentIdFile)) throw new ServiceException("父文件夹不存在");
filePath = filePath+
StringUtils.substringAfter(parentIdFile.getUrl(), Constants.HADOOP_PREFIX)
.replace("--","/");
}
diskSensitiveWordService.filterSensitiveWord(file.getOriginalFilename());
DiskFile diskFile = new DiskFile();
String fileName = RandomUtil.randomString(4)+"_"+file.getOriginalFilename();
diskFile.setName(fileName);
// 上传并返回新文件名称
fileName = FileUploadUtils.upload(filePath,false, file,fileName);
// 上传到hdfs
String descPath = StringUtils.substringAfter(fileName, Constants.RESOURCE_PREFIX);
//把本地文件上传到hdfs
hadoopTemplate.copyFileToHDFS(true,true,RuoYiConfig.getProfile()+ StringUtils.substringAfter(fileName, Constants.RESOURCE_PREFIX), descPath);
String url = serverConfig.getUrl() + Constants.HADOOP_PREFIX + descPath.replace("/","--");
diskFile.setCreateId(getUserId());
diskFile.setOldName(file.getOriginalFilename());
diskFile.setIsDir(0);
diskFile.setOrderNum(0);
diskFile.setParentId(parentId);
diskFile.setUrl(url.replace(serverConfig.getUrl(),""));
diskFile.setSize(file.getSize());
diskFile.setType(diskFileService.getType(extension));
diskFileService.save(diskFile,diskStorage);
AjaxResult ajax = AjaxResult.success();
ajax.put("url", url);
ajax.put("fileName", url.replace(serverConfig.getUrl(),""));
ajax.put("newFileName", FileUtils.getName(fileName));
ajax.put("originalFilename", file.getOriginalFilename());
ajax.put("size", file.getSize());
ajax.put("type", extension);
return ajax;
}
catch (Exception e)
{
return AjaxResult.error(e.getMessage());
}
}
文件下载
/**
* hadoop文件下载
*/
@GetMapping("/download/zip")
public void hadoopDownload(DownloadBo downloadBo, HttpServletResponse response) {
List<DiskFile> diskFiles;
String dest = RuoYiConfig.getProfile()+"/";
if (StringUtils.isNotEmpty(downloadBo.getUuid())&&StringUtils.isNotEmpty(downloadBo.getSecretKey())) {
diskFiles = diskFileService.selectDiskFileListByIds(Arrays.stream(downloadBo.getIds().split(","))
.map(String::trim)
.map(Long::valueOf)
.toArray(Long[]::new));
dest = dest + downloadBo.getUuid();
} else {
diskFiles = diskFileService.selectDiskFileListByIds(Arrays.stream(downloadBo.getIds().split(","))
.map(String::trim)
.map(Long::valueOf)
.toArray(Long[]::new),getUserId());
dest = dest + RandomUtil.randomString(6);
}
String downloadPath = dest + ".zip";
try {
ByteArrayOutputStream out = null;
try {
out = new ByteArrayOutputStream();
ZipOutputStream zos = new ZipOutputStream(out);
for (int i = 0; i < diskFiles.size(); i++) {
String path = StringUtils.substringAfter(diskFiles.get(i).getUrl(),Constants.HADOOP_PREFIX);
// 本地资源路径
path = path.replace("--","/");
//从远程下载文件到本地
hadoopTemplate.down(path,zos,out);
}
zos.close();
} catch (Exception e) {
log.debug("diskfile 从远程下载文件到本地报错: "+e);
}
// 调用zip方法进行压缩
byte[] data = out.toByteArray();
out.close();
response.reset();
response.addHeader("Access-Control-Allow-Origin", "*");
response.addHeader("Access-Control-Expose-Headers", "Content-Disposition");
response.setHeader("Content-Disposition", "attachment; filename=\"ruoyi.zip\"");
response.addHeader("Content-Length", "" + data.length);
response.setContentType("application/octet-stream; charset=UTF-8");
IOUtils.write(data, response.getOutputStream());
} catch (IOException e) {
log.error("diskFile 下载文件失败", e);
} finally {
FileUtils.deleteFile(downloadPath);
}
}
文件预览
import com.ruoyi.disk.HadoopTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.ByteArrayResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import java.io.ByteArrayOutputStream;
@Controller
@RequestMapping("/hadoop")
public class HadoopController {
@Autowired
private HadoopTemplate hadoopTemplate;
@Value("${hdfs.hdfs-site}")
private String hdfsSite;
@GetMapping("/{descPath}")
public ResponseEntity<ByteArrayResource> preview(@PathVariable("descPath") String descPath) {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
hadoopTemplate.open(descPath.replace("--", "/"), outputStream);
String fileExtension = hadoopTemplate.getFileExtension(descPath.replace("--", "/"));
byte[] byteArray = outputStream.toByteArray();
// 创建字节数组资源
ByteArrayResource resource = new ByteArrayResource(byteArray);
// 设置响应头
HttpHeaders headers = new HttpHeaders();
switch (fileExtension) {
case "png":
headers.setContentType(MediaType.IMAGE_PNG);
break;
case "gif":
headers.setContentType(MediaType.IMAGE_GIF);
break;
case "jpeg":
headers.setContentType(MediaType.IMAGE_JPEG);
break;
default:
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
break;
}
// 返回字节数组资源作为响应
return ResponseEntity.ok()
.headers(headers)
.contentLength(byteArray.length)
.body(resource);
}
}