springboot集成hadoop上传文件

springboot集成hadoop上传文件

ps:springboot版本2.5

1.引入依赖
<!-- hadoop依赖 -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.3</version>
            <exclusions>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-log4j12</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>javax.servlet</groupId>
                    <artifactId>servlet-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.7.3</version>
            <exclusions>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-log4j12</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>javax.servlet</groupId>
                    <artifactId>servlet-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.7.3</version>
            <exclusions>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-log4j12</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>javax.servlet</groupId>
                    <artifactId>servlet-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
2.YAML文件配置
# HDFS 配置
hadoop:
  #服务器地址
  fsUri: hdfs://xxx.xxx.xxx.xx:9000
  #上传路径
  input:
    path: /datasource/dailyData/
  #下载路径
  output:
    path: /datasource/dailyData/
3.编写配置类
package com.dfxn.system.config;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.annotation.Bean;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

/**
 * @Author 
 * @Date 2024/7/22 上午11:50
 * @Version 1.0
 */

@org.springframework.context.annotation.Configuration
@ConditionalOnProperty(name="hadoop.fsUri")
public class HadoopConfig {

    @Value("${hadoop.fsUri}")
    private String fsUri;


    /**
     * Configuration conf=new Configuration();
     * 创建一个Configuration对象时,其构造方法会默认加载hadoop中的两个配置文件,
     * 分别是hdfs-site.xml以及core-site.xml,这两个文件中会有访问hdfs所需的参数值,
     * 主要是fs.default.name,指定了hdfs的地址,有了这个地址客户端就可以通过这个地址访问hdfs了。
     * 即可理解为configuration就是hadoop中的配置信息。
     * @return
     */
    @Bean
    public FileSystem fileSystem() throws IOException, InterruptedException, URISyntaxException {
        // 创建Hadoop Configuration实例
        Configuration configuration = new Configuration();
        // 设置HDFS系统URL
        configuration.set("fs.defaultFS", fsUri);
        // 使用指定的URI、配置和用户名"root"来获取一个FileSystem实例
        return FileSystem.get(new URI(fsUri), configuration, "root");
    }
}

4.编写工具类操作
package com.dfxn.system.util;

import lombok.extern.log4j.Log4j2;
import org.springframework.stereotype.Component;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.springframework.beans.factory.annotation.Value;

import javax.annotation.Resource;
import java.io.*;
/**
 * Hadoop工具类
 * @Author 
 * @Date 2024/7/23 下午2:54
 * @Version 1.0
 */
@Component
@Log4j2
public class HadoopUtil {

    @Value("${hadoop.fsUri}")
    private String fsUri;

    @Value("${hadoop.input.path}")
    private String inputPath;

    @Resource
    private FileSystem fileSystem;


    /**
     * 输入流上传
     * @param inputStream 输入流
     * @param hdfsFilePath 路径
     * @throws IOException
     */
    public void uploadFileToHDFS(InputStream inputStream, String hdfsFilePath) throws IOException {
        Path path = new Path(hdfsFilePath);
        try (FSDataOutputStream outputStream = fileSystem.create(path)) {
            byte[] buffer = new byte[4096];
            int bytesRead;
            while ((bytesRead = inputStream.read(buffer)) > 0) {
                outputStream.write(buffer, 0, bytesRead);
            }
            log.info("文件上传成功");
        } finally {
            inputStream.close();
        }
    }

    /**
     * 本地文件上传
     * @param localFilePath 文件路径
     * @param hdfsFilePath 上传路径
     * @throws IOException
     */
    public void uploadLocalFileToHDFS(String localFilePath, String hdfsFilePath) throws IOException {
        try (InputStream inputStream = new FileInputStream(localFilePath)) {
            uploadFileToHDFS(inputStream, hdfsFilePath);
        } catch (FileNotFoundException e) {
            throw new IOException("找不到文件:" + localFilePath, e);
        }
    }

    /**
     * 文件下载
     * @param hdfsFilePath 文件路径
     * @param outputStream 输出流
     * @throws IOException
     */
    public void downloadFileFromHDFS(String hdfsFilePath, OutputStream outputStream) throws IOException {
        Path path = new Path(hdfsFilePath);
        try (FSDataInputStream inputStream = fileSystem.open(path)) {
            byte[] buffer = new byte[4096];
            int bytesRead;
            while ((bytesRead = inputStream.read(buffer)) > 0) {
                outputStream.write(buffer, 0, bytesRead);
            }
        }
    }

    /**
     * 创建目录
     * @param hdfsDirPath 文件目录
     * @throws IOException
     */
    public void createDirectoryInHDFS(String hdfsDirPath) throws IOException {
        Path path = new Path(hdfsDirPath);
        // 检查目录是否已经存在
        if (fileSystem.exists(path)) {
            // 目录已存在
            log.info("目录已存在: " + hdfsDirPath);
            return;
        }
        // 创建目录
        boolean success = fileSystem.mkdirs(path);
        if (!success) {
            throw new IOException("目录创建失败: " + hdfsDirPath);
        }
        log.info("目录创建成功: " + hdfsDirPath);
    }
}

5.实际操作
@RestController
@RequestMapping("/hadoop/files")
public class HadoopController {
	@Autowired
    private HadoopUtil hadoopUtil;
     /**
     * 服务器地址
     */
    @Value("${hadoop.fsUri}")
    private String fsUri;

    /**
     * 上传路径
     */
    @Value("${hadoop.input.path}")
    private String inputPath;
     /**
     * 上传本地文件
     * @param srcFile 文件路径
     * @return
     * @throws IOException
     */
    @PostMapping("/uploadLocal")
    public String uploadlocal(@RequestParam String srcFile) throws IOException {
        //上传时需要补全文件名
        String uploadUrl = fsUri+inputPath+"xx.csv";
        hadoopUtil.uploadLocalFileToHDFS(srcFile, uploadUrl);
        return "upload";
    }

    /**
     * 下载文件
     * @param srcFile 文件名
     * @param response
     */
    @GetMapping("/download")
    public void downloadFile(@RequestParam String srcFile, HttpServletResponse response) {
        //下载文件名及路径
        String hdfsFilePath=fsUri+inputPath+"/"+srcFile;
        response.setContentType("application/octet-stream");
        response.setHeader("Content-Disposition", "attachment; filename=" + hdfsFilePath.substring(hdfsFilePath.lastIndexOf('/') + 1));
        try {
            hadoopUtil.downloadFileFromHDFS(hdfsFilePath, response.getOutputStream());
            response.flushBuffer();
        } catch (IOException e) {
            e.printStackTrace();
            response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
        }
    }


}
问题:
配置本地环境windows下
 HADOOP_HOME and hadoop.home.dir are unset. -see https://wiki.apache.org/hadoop/WindowsProblems
	at org.apache.hadoop.util.Shell.fileNotFoundException(Shell.java:547) ~[hadoop-common-3.2.4.jar:na]
配置文件下载
https://gitee.com/nkuhyx/winutils.git

解压后配置系统变量

添加系统变量
HADOOP_HOME
D:\work\hadoop-3.2.1
添加到path
%HADOOP_HOME%\bin

重启系统

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值