hdfs java客户端使用，文件上传下载，预览的实现

Honmaple

已于 2024-06-20 12:34:47 修改

阅读量323

点赞数 4

文章标签： hdfs java hadoop 大数据分布式 spring boot

于 2024-06-20 12:28:28 首次发布

本文链接：https://blog.csdn.net/Honmaple/article/details/139828641

版权

1. 环境部署

1.1 Linux hadoop集群搭建 Hadoop大数据集群搭建（超详细）_hadoop集群搭建-CSDN博客

1.2 windows hadoop util 安装

Hadoop——Windows系统下Hadoop单机环境搭建_hadoop windows开发环境搭建-CSDN博客

1.3 温馨提示，如果要使用java客户端的api，本地就必须需要安装hadoop才能调用，如果要脱离环境，可以使用WebHDFS，具体的可以搜索一下Hadoop REST API – WebHDFS

本项目是基于java客户端api实现的

2.Maven 配置

        <dependency>
            <groupId>cn.hutool</groupId>
            <artifactId>hutool-all</artifactId>
            <version>5.3.9</version>
        </dependency>

        <!-- hadoop -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>3.2.4</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>3.2.4</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>3.2.4</version>
        </dependency>

3.hdfs java api 工具类


import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnBean;
import org.springframework.stereotype.Component;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.text.SimpleDateFormat;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

/**
 * @author maple
 * @describe
 * @createTime 2024/05/12
 */
@Component
@ConditionalOnBean(FileSystem.class)
public class HadoopTemplate {
    private static final Logger log = LoggerFactory.getLogger(HadoopConfig.class);

    @Autowired
    private FileSystem fileSystem;

    public void uploadFile(String srcFile, String destPath) {
        copyFileToHDFS(false, true, srcFile, destPath);
    }
 
    public void uploadFile(boolean del, String srcFile, String destPath) {
        copyFileToHDFS(del, true, srcFile, destPath);
    }
 
    public void delDir(String path) {
        rmdir(path, null);
    }
 
    public void download(String fileName, String savePath) {
        getFile(fileName, savePath);
    }
 
 
    /**
     * 创建目录
     *
     * @param filePath
     * @param create
     * @return
     */
    public boolean existDir(String filePath, boolean create) throws IOException {
        boolean flag = false;
        if (StringUtils.isEmpty(filePath)) {
            throw new IllegalArgumentException("filePath不能为空");
        }
        Path path = new Path(filePath);
        if (create) {
            if (!fileSystem.exists(path)) {
                fileSystem.mkdirs(path);
            }
        }
        if (fileSystem.isDirectory(path)) {
            flag = true;
        }
        return flag;
    }
 
    /**
     * 创建目录
     *
     * @param filePath
     * @return
     */
    public boolean existFile(String filePath) throws IOException {
        if (StringUtils.isEmpty(filePath)) {
            throw new IllegalArgumentException("filePath不能为空");
        }
        Path path = new Path(filePath);
        return fileSystem.exists(path);
    }
 
 
    /**
     * 文件上传至 HDFS
     *
     * @param delSrc    指是否删除源文件，true 为删除，默认为 false
     * @param overwrite
     * @param srcFile   源文件，上传文件路径
     * @param destPath  hdfs的目的路径
     */
    public void copyFileToHDFS(boolean delSrc, boolean overwrite, String srcFile, String destPath) {
        // 源文件路径是Linux下的路径，如果在 windows 下测试，需要改写为Windows下的路径，比如D://hadoop/djt/weibo.txt
        Path srcPath = new Path(srcFile);
        Path dstPath = new Path(destPath);
        // 实现文件上传
        try {
            // 获取FileSystem对象
            fileSystem.copyFromLocalFile(delSrc, overwrite, srcPath, dstPath);
            System.out.println(dstPath);
        } catch (IOException e) {
            log.error("", e);
        }
    }
 
    /**
     * 删除文件或者文件目录
     *
     * @param path
     */
    public void rmdir(String path, String fileName) {
        try {
            if (StringUtils.isNotBlank(fileName)) {
                path = path + "/" + fileName;
            }
            // 删除文件或者文件目录  delete(Path f) 此方法已经弃用
            fileSystem.delete(new Path(path), true);
        } catch (IllegalArgumentException | IOException e) {
            log.error("", e);
        }
    }
 
    /**
     * 从 HDFS 下载文件
     *
     * @param hdfsFile
     * @param destPath 文件下载后,存放地址
     */
    public void getFile(String hdfsFile, String destPath) {
 
        Path hdfsPath = new Path(hdfsFile);
        Path dstPath = new Path(destPath);
        try {
            // 下载hdfs上的文件
            fileSystem.copyToLocalFile(hdfsPath, dstPath);
        } catch (IOException e) {
            log.error("", e);
        }
    }
 
 
    public void writer(String destPath, InputStream in)  {
        try {
            FSDataOutputStream out = fileSystem.create(new Path(destPath));
            IOUtils.copyBytes(in, out, fileSystem.getConf());
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    public void open(String destPath, OutputStream out) {
        FSDataInputStream in = null;
        try {
            in = fileSystem.open(new Path(destPath));
            IOUtils.copyBytes(in,out,4096,false);
            in.seek(0);
            IOUtils.copyBytes(in,out,4096,false);
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            IOUtils.closeStream(in);
        }

    }

    public String getFileExtension(String destPath) {
        Path path = new Path(destPath);
        FileStatus fileStatus = null;
        try {
            // 获取文件的状态信息
            fileStatus = fileSystem.getFileStatus(path);
        } catch (IOException e) {
            log.info("获取文件的状态信息 IOException? " + e);
        }

        // 检查是否是目录
        boolean isDir = fileStatus.isDirectory();
        log.info("Is directory? " + isDir);

        // 检查是否是文件
        boolean isFile = fileStatus.isFile();
        log.info("Is file? " + isFile);

        // 如果是文件，可以获取文件的扩展名
        if (isFile) {
            String fileName = path.getName();
            String fileExtension = fileName.substring(fileName.lastIndexOf('.') + 1);
            log.info("File extension: " + fileExtension);
            return fileExtension;
        }
        return "";
    }

    public static String getContentType(String destPath) throws IOException {
        Path hdfsPath = new Path(destPath);

        // 获取文件名
        String fileName = hdfsPath.getName();

        // 根据文件扩展名推断ContentType，这里只是一个简单的例子
        if (fileName.endsWith(".txt")) {
            return "text/plain";
        } else if (fileName.endsWith(".jpg")) {
            return "image/jpeg";
        } else if (fileName.endsWith(".png")) {
            return "image/png";
        } else {
            // 默认返回"application/octet-stream"
            return "application/octet-stream";
        }
    }

    //获取特定路径的所有文件
    public void getFileList(String path) throws IOException {
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        FileStatus[] fileStatuses = fileSystem.listStatus(new Path(path));
        for(FileStatus fileStatus: fileStatuses) {
            System.out.println(fileStatus.getPath().getName());
            System.out.println(format.format(fileStatus.getModificationTime()));
            if(fileStatus.isDirectory())
                 System.out.println("目录");
            else
                 System.out.println("文件");
        }
        //排序，目录放前面，文件放后面
        //Collator collator = Collator.getInstance(Locale.CHINA);
        //fileList.sort((f1, f2) -> (collator.compare(f1.getType(), f2.getType())));
        //return System.out.println(format.format(fileStatus.getModificationTime()));;
    }

    /**
     * 多文件(文件夹)
     *
     * @param cloudPath
     *            cloudPath
     * @author liudz
     * @date 2020/6/8
     * @return 执行结果
     **/
    public OutputStream down(String cloudPath,ZipOutputStream zos,ByteArrayOutputStream out) {
        try {
            compress(cloudPath, zos, fileSystem);
        } catch (IOException e) {
            log.info("----error:{}----" + e.getMessage());
        }
        return out;
    }

    /**
     * 多文件(文件夹)
     *
     * @param cloudPath
     *            cloudPath
     * @author liudz
     * @date 2020/6/8
     * @return 执行结果
     **/
    public OutputStream down2(String cloudPath) {
        // 1获取对象
        ByteArrayOutputStream out = null;
        try {
            out = new ByteArrayOutputStream();
            ZipOutputStream zos = new ZipOutputStream(out);
            compress(cloudPath, zos, fileSystem);
            zos.close();
        } catch (IOException e) {
            log.info("----error:{}----" + e.getMessage());
        }
        return out;
    }

    /**
     * compress
     *
     * @param baseDir
     *            baseDir
     * @param zipOutputStream
     *            zipOutputStream
     * @param fs
     *            fs
     * @author liudz
     * @date 2020/6/8
     **/
    public void compress(String baseDir, ZipOutputStream zipOutputStream, FileSystem fs) throws IOException {

        try {
            FileStatus[] fileStatulist = fs.listStatus(new Path(baseDir));
            log.info("basedir = " + baseDir);
            String[] strs = baseDir.split("/");
            //lastName代表路径最后的单词
            String lastName = strs[strs.length - 1];

            for (int i = 0; i < fileStatulist.length; i++) {

                String name = fileStatulist[i].getPath().toString();
                name = name.substring(name.indexOf("/" + lastName));

                if (fileStatulist[i].isFile()) {
                    Path path = fileStatulist[i].getPath();
                    FSDataInputStream inputStream = fs.open(path);
                    zipOutputStream.putNextEntry(new ZipEntry(name.substring(1)));
                    IOUtils.copyBytes(inputStream, zipOutputStream, Integer.parseInt("1024"));
                    inputStream.close();
                } else {
                    zipOutputStream.putNextEntry(new ZipEntry(fileStatulist[i].getPath().getName() + "/"));
                    log.info("fileStatulist[i].getPath().toString() = " + fileStatulist[i].getPath().toString());
                    compress(fileStatulist[i].getPath().toString(), zipOutputStream, fs);
                }
            }
        } catch (IOException e) {
            log.info("----error:{}----" + e.getMessage());
        }
    }
 
}

4. 配置

/**
 * @author maple
 * @describe
 * @createTime 2024-05-01
 */
@Configuration
public class HadoopConfig {
    private static final Logger log = LoggerFactory.getLogger(HadoopConfig.class);

    @Value("${hadoop.user}")
    private String user;
    @Value("${hadoop.password}")
    private String password;
    @Value("${hdfs.hdfs-site}")
    private String hdfsSite;
    @Value("${hdfs.core-site}")
    private String coreSite;

    @Bean("fileSystem")
    public FileSystem createFs() throws Exception {
 
        System.setProperty("HADOOP_USER_NAME", user);
        System.setProperty("HADOOP_USER_PASSWORD", password);
        //读取配置文件
        org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
//        conf.addResource(coreSite);
//        conf.addResource(hdfsSite);
        conf.set("fs.defaultFS",hdfsSite);
        conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
        log.info("===============【hadoop configuration info start.】===============");
        log.info("【hadoop conf】: size:{}, {}", conf.size(), conf.toString());
        log.info("【fs.defaultFS】: {}", conf.get("fs.defaultFS"));
        log.info("【fs.hdfs.impl】: {}", conf.get("fs.hdfs.impl"));
        FileSystem fs = FileSystem.newInstance(conf);
        log.info("【fileSystem scheme】: {}", fs.getScheme());
        log.info("===============【hadoop configuration info end.】===============");
        return fs;
    }
}

hadoop:
  user: user001
  password: ******

hdfs:
  hdfs-site: hdfs://hadoop101:9000
  core-site:

5.使用案例

上传文件

 /**
     * 通用上传请求（单个）
     */
    @PostMapping("/upload/{parentId}")
    @Transactional
    public AjaxResult uploadFile(MultipartFile file,@PathVariable Long parentId) throws Exception
    {
        try
        {
            String extension = FileUploadUtils.getExtension(file);
            // 上传文件路径
            String filePath = RuoYiConfig.getProfile();
            // 获取当前用户本人的存储目录
            DiskStorage diskStorage = diskStorageService.selectDiskStorageByUserId(SecurityUtils.getUserId());
            if (Objects.isNull(diskStorage)) throw new ServiceException("未初始化存储空间");
            if (diskStorage.getTotalCapacity()-diskStorage.getUsedCapacity()<=0) throw new ServiceException("存储空间不足");
            if (parentId.equals(0L)) {
                filePath = filePath+"/"+diskStorage.getBaseDir();
            } else {
                DiskFile parentIdFile = diskFileService.selectDiskFileById(parentId);
                if (Objects.isNull(parentIdFile)) throw new ServiceException("父文件夹不存在");
                filePath = filePath+
                        StringUtils.substringAfter(parentIdFile.getUrl(), Constants.HADOOP_PREFIX)
                        .replace("--","/");
            }
            diskSensitiveWordService.filterSensitiveWord(file.getOriginalFilename());
            DiskFile diskFile = new DiskFile();
            String fileName = RandomUtil.randomString(4)+"_"+file.getOriginalFilename();
            diskFile.setName(fileName);
            // 上传并返回新文件名称
            fileName = FileUploadUtils.upload(filePath,false, file,fileName);
            // 上传到hdfs

            String descPath = StringUtils.substringAfter(fileName, Constants.RESOURCE_PREFIX);
//把本地文件上传到hdfs
            hadoopTemplate.copyFileToHDFS(true,true,RuoYiConfig.getProfile()+ StringUtils.substringAfter(fileName, Constants.RESOURCE_PREFIX), descPath);
            String url = serverConfig.getUrl() + Constants.HADOOP_PREFIX + descPath.replace("/","--");
            diskFile.setCreateId(getUserId());
            diskFile.setOldName(file.getOriginalFilename());
            diskFile.setIsDir(0);
            diskFile.setOrderNum(0);
            diskFile.setParentId(parentId);
            diskFile.setUrl(url.replace(serverConfig.getUrl(),""));
            diskFile.setSize(file.getSize());
            diskFile.setType(diskFileService.getType(extension));
            diskFileService.save(diskFile,diskStorage);
            AjaxResult ajax = AjaxResult.success();
            ajax.put("url", url);
            ajax.put("fileName", url.replace(serverConfig.getUrl(),""));
            ajax.put("newFileName", FileUtils.getName(fileName));
            ajax.put("originalFilename", file.getOriginalFilename());
            ajax.put("size", file.getSize());
            ajax.put("type", extension);
            return ajax;
        }
        catch (Exception e)
        {
            return AjaxResult.error(e.getMessage());
        }
    }

文件下载

    /**
     * hadoop文件下载
     */
    @GetMapping("/download/zip")
    public void hadoopDownload(DownloadBo downloadBo, HttpServletResponse response) {
        List<DiskFile> diskFiles;
        String dest = RuoYiConfig.getProfile()+"/";
        if (StringUtils.isNotEmpty(downloadBo.getUuid())&&StringUtils.isNotEmpty(downloadBo.getSecretKey())) {
            diskFiles = diskFileService.selectDiskFileListByIds(Arrays.stream(downloadBo.getIds().split(","))
                    .map(String::trim)
                    .map(Long::valueOf)
                    .toArray(Long[]::new));
            dest = dest + downloadBo.getUuid();
        } else {
            diskFiles = diskFileService.selectDiskFileListByIds(Arrays.stream(downloadBo.getIds().split(","))
                    .map(String::trim)
                    .map(Long::valueOf)
                    .toArray(Long[]::new),getUserId());
            dest = dest + RandomUtil.randomString(6);
        }
        String downloadPath = dest + ".zip";

        try {
            ByteArrayOutputStream out = null;
            try {
                    out = new ByteArrayOutputStream();
                    ZipOutputStream zos = new ZipOutputStream(out);
                for (int i = 0; i < diskFiles.size(); i++) {
                    String path = StringUtils.substringAfter(diskFiles.get(i).getUrl(),Constants.HADOOP_PREFIX);
                    // 本地资源路径
                    path = path.replace("--","/");
                    //从远程下载文件到本地
                    hadoopTemplate.down(path,zos,out);
                }
                zos.close();
            } catch (Exception e) {
                log.debug("diskfile 从远程下载文件到本地报错: "+e);
            }
            // 调用zip方法进行压缩
            byte[] data = out.toByteArray();
            out.close();
            response.reset();
            response.addHeader("Access-Control-Allow-Origin", "*");
            response.addHeader("Access-Control-Expose-Headers", "Content-Disposition");
            response.setHeader("Content-Disposition", "attachment; filename=\"ruoyi.zip\"");
            response.addHeader("Content-Length", "" + data.length);
            response.setContentType("application/octet-stream; charset=UTF-8");
            IOUtils.write(data, response.getOutputStream());
        } catch (IOException e) {
            log.error("diskFile 下载文件失败", e);
        } finally {
            FileUtils.deleteFile(downloadPath);
        }

    }

文件预览

import com.ruoyi.disk.HadoopTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.ByteArrayResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;

import java.io.ByteArrayOutputStream;

@Controller
@RequestMapping("/hadoop")
public class HadoopController {

    @Autowired
    private HadoopTemplate hadoopTemplate;

    @Value("${hdfs.hdfs-site}")
    private String hdfsSite;

    @GetMapping("/{descPath}")
    public ResponseEntity<ByteArrayResource> preview(@PathVariable("descPath") String descPath) {
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        hadoopTemplate.open(descPath.replace("--", "/"), outputStream);
        String fileExtension = hadoopTemplate.getFileExtension(descPath.replace("--", "/"));

        byte[] byteArray = outputStream.toByteArray();

        // 创建字节数组资源
        ByteArrayResource resource = new ByteArrayResource(byteArray);

        // 设置响应头
        HttpHeaders headers = new HttpHeaders();

        switch (fileExtension) {
            case "png":
                headers.setContentType(MediaType.IMAGE_PNG);
                break;
            case "gif":
                headers.setContentType(MediaType.IMAGE_GIF);
                break;
            case "jpeg":
                headers.setContentType(MediaType.IMAGE_JPEG);
                break;
            default:
                headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
                break;
        }

        // 返回字节数组资源作为响应
        return ResponseEntity.ok()
                .headers(headers)
                .contentLength(byteArray.length)
                .body(resource);

    }

}

6. 完整项目代码地址：netdisk: 在线网盘系统，有本存储版，和hadoop大数据hdfs分布式文件存储版本，使用了DFA算法，实现了文件夹的创建与修改，多级目录，很正常的文件夹一样，支持所有文件上传，并按文件类型分类，支持文件删除，回收站管理，恢复与彻底删除，支持公开分享和私密分享可自动生成提取码，设置过期时间或永久有效，支持图片，视频文件的预览，支持文件夹及文件的批量压缩下载

Honmaple

关注

4
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
hdfs java客户端使用，文件上传下载，预览的实现

1.3 温馨提示，如果要使用java客户端的api，本地就必须需要安装hadoop才能调用，如果要脱离环境，可以使用web-api，具体的可以搜索一下hadoop web-api。1.2 windows hadoop util 安装。1.1 Linux hadoop集群搭建。本项目是基于java客户端api实现的。
复制链接

扫一扫