2.springboot操作hdfs

2 篇文章 0 订阅
1 篇文章 0 订阅

记录springboot操作hdfs过程中遇到的一些问题。

一、pom文件引入依赖:

<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-hdfs</artifactId>
    <version>3.3.6</version>
</dependency>
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>3.3.6</version>
</dependency>

二、编写applicaton.yml、config、service、controller。

demo代码主要参考地址:https://kgithub.com/WinterChenS/springboot-learning-experience/blob/master/spring-boot-hadoop

hdfs:
  hdfsPath: hdfs://192.xxx.xxx.xx:8888
  hdfsName: xxx
package com.guo.self.dubai.hadoop;

import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.fs.FileSystem;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;


/**
 * @author:guoq
 * @date:2023/7/28
 * @descripion:
 */
@Slf4j
@Configuration
public class HadoopConfig {
    @Value("${hdfs.hdfsPath}")
    private  String hdfsPath;
    @Value("${hdfs.hdfsName}")
    private  String hdfsName;
    @Bean
    public org.apache.hadoop.conf.Configuration getConfiguration(){
        org.apache.hadoop.conf.Configuration config = new org.apache.hadoop.conf.Configuration();
        config.set("fs.defaultFS",hdfsPath);
        return  config;
    }
    @Bean
    public FileSystem getFileSystem(){
        FileSystem fileSystem=null;
        try {
            fileSystem= FileSystem.get(new URI(hdfsPath), getConfiguration(), hdfsName);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
        return fileSystem;
    }
}
package com.guo.self.dubai.hadoop.hdfs;

import org.apache.hadoop.fs.BlockLocation;
import org.springframework.web.multipart.MultipartFile;

import java.util.List;
import java.util.Map;

/**
 * @author:guoq
 * @date:2023/7/31
 * @descripion:
 */
public interface HDFSService {
    //文件是否存在
    boolean existFile(String path);
    //目录
    List<Map<String, Object>> readCatalog(String path);
    //读取文件内容
    String readFileContent(String path);
    //创建文件  hdfs的create方法+输出流的write
    boolean createFile(String path, MultipartFile file);
    //上传文件  将本地已有的文件上传到hdfs服务器, 源码底层调用的是hdfs的create方法+输出流的write
    boolean uploadFile(String path, String uploadPath);
    //下载文件  将hdfs服务器已有的文件下载到本地, 源码底层调用的是hdfs的create方法+输出流的write
    boolean downloadFile(String path, String downloadPath);
    //删除文件
    boolean deleteFile(String path);
    BlockLocation[] getFileBlockLocations(String path);

}
package com.guo.self.dubai.hadoop.hdfs;

import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.fs.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.StringUtils;
import org.springframework.web.multipart.MultipartFile;

import java.io.IOException;
import java.util.*;

/**
 * @author:guoq
 * @date:2023/7/31
 * @descripion:
 */
@Service
@Slf4j
public class HDFSServiceImpl  implements  HDFSService{
    @Autowired
    private FileSystem fileSystem;
    @Override
    public boolean existFile(String path) {
        if (StringUtils.isEmpty(path)){
            return false;
        }
        Path src = new Path(path);
        try {
            return fileSystem.exists(src);
        } catch (IOException e) {
            log.error(e.getMessage());
        }
        return false;
    }

    @Override
    public List<Map<String, Object>> readCatalog(String path) {
        if (StringUtils.isEmpty(path)){
            return Collections.emptyList();
        }
        if (!existFile(path)){
            log.error("catalog is not exist!!");
            return Collections.emptyList();
        }

        Path src = new Path(path);
        FileStatus[] fileStatuses = null;
        try {
            fileStatuses = fileSystem.listStatus(src);
        } catch (IOException e) {
            log.error(e.getMessage());
        }
        List<Map<String, Object>> result = new ArrayList<>(fileStatuses.length);

        if (null != fileStatuses && 0 < fileStatuses.length) {
            for (FileStatus fileStatus : fileStatuses) {
                Map<String, Object> cataLogMap = new HashMap<>();
                cataLogMap.put("filePath", fileStatus.getPath());
                cataLogMap.put("fileStatus", fileStatus);
                result.add(cataLogMap);
            }
        }
        return result;
    }

    @Override
    public boolean createFile(String path, MultipartFile file) {
        boolean target = false;
        if (StringUtils.isEmpty(path)) {
            return false;
        }
        String fileName = file.getOriginalFilename();
//        String fileName = file.getName();
        Path newPath = new Path(path + "/" + fileName);

        FSDataOutputStream outputStream = null;
        try {
            outputStream = fileSystem.create(newPath);
            outputStream.write(file.getBytes());
            target = true;
        } catch (IOException e) {
            log.error(e.getMessage());
        } finally {
            if (null != outputStream) {
                try {
                    outputStream.close();
                } catch (IOException e) {
                    log.error(e.getMessage());
                }
            }
        }
        return target;
    }

    @Override
    public boolean uploadFile(String path, String uploadPath) {
        if (StringUtils.isEmpty(path) || StringUtils.isEmpty(uploadPath)) {
            return false;
        }

        Path clientPath = new Path(path);

        Path serverPath = new Path(uploadPath);

        try {
            fileSystem.copyFromLocalFile(false,clientPath,serverPath);
            return true;
        } catch (IOException e) {
            log.error(e.getMessage(), e);
        }
        return false;
    }

    @Override
    public boolean downloadFile(String path, String downloadPath) {
        if (StringUtils.isEmpty(path) || StringUtils.isEmpty(downloadPath)) {
            return false;
        }

        Path clienPath = new Path(path);

        Path targetPath = new Path(downloadPath);

        try {
            fileSystem.copyToLocalFile(false,clienPath, targetPath);
            return true;
        } catch (IOException e) {
            log.error(e.getMessage());
        }
        return false;
    }

    @Override
    public boolean deleteFile(String path) {
        boolean target = false;
        if (StringUtils.isEmpty(path)) {
            return false;
        }
        if (!existFile(path)) {
            return false;
        }
        Path src = new Path(path);
        try {
            target = fileSystem.deleteOnExit(src);
        } catch (IOException e) {
            log.error(e.getMessage());
        }
        return target;
    }

    @Override
    public BlockLocation[] getFileBlockLocations(String path) {
        if (StringUtils.isEmpty(path)) {
            return null;
        }
        if (!existFile(path)) {
            return null;
        }
        BlockLocation[] blocks = null;
        Path src = new Path(path);
        try{
            FileStatus fileStatus = fileSystem.getFileStatus(src);
            blocks = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
        }catch(Exception e){
            log.error(e.getMessage());
        }
        return blocks;
    }

    @Override
    public String readFileContent(String path) {
        if (StringUtils.isEmpty(path)){
            return null;
        }

        if (!existFile(path)) {
            return null;
        }

        Path src = new Path(path);

        FSDataInputStream inputStream = null;
        StringBuilder sb = new StringBuilder();
        try {
            inputStream = fileSystem.open(src);
            String lineText = "";
            while ((lineText = inputStream.readLine()) != null) {
                sb.append(lineText);
            }
        } catch (IOException e) {
            log.error(e.getMessage());
        } finally {
            if (null != inputStream) {
                try {
                    inputStream.close();
                } catch (IOException e) {
                    log.error(e.getMessage());
                }
            }
        }
        return sb.toString();
    }
}
package com.guo.self.dubai.hadoop;

import com.guo.self.dubai.hadoop.mapreduce.WordCountDriver;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.fs.FileSystem;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/**
 * @author:guoq
 * @date:2023/7/28
 * @descripion:
 */
@Slf4j
@Configuration
public class HadoopConfig {
    @Value("${hdfs.hdfsPath}")
    private  String hdfsPath;
    @Value("${hdfs.hdfsName}")
    private  String hdfsName;
    @Bean
    public org.apache.hadoop.conf.Configuration getConfiguration(){
        org.apache.hadoop.conf.Configuration config = new org.apache.hadoop.conf.Configuration();
        config.set("fs.defaultFS",hdfsPath);
        return  config;
    }
    @Bean
    public FileSystem getFileSystem(){
        FileSystem fileSystem=null;
        try {
            fileSystem= FileSystem.get(new URI(hdfsPath), getConfiguration(), hdfsName);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
        return fileSystem;
    }

}

三、启动报错:java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset

参考文章:windows搭建hadoop环境(解决HADOOP_HOME and hadoop.home.dir are unset)_hadoop_home and hadoop.home.dir are unset._ppandpp的博客-CSDN博客

  • 配置本地环境变量
  • 找到第一步解压的文件,找到hadoop.xxxx/ 与bin目录同级的etc文件,进入里层文件夹,编辑hadoop-env.cmd,找到set JAVA_HOME=%JAVA_HOME%,替换成你本地的jdk环境,提示如果本地配置文件夹中有空格,会不生效,可以新建一个链接文件,链接到目标文件,把链接文件夹设置到环境变量,命令:mklink/J D:\ProgramFiles "D:\Program Files",
  • 执行hadoop version即可

四、启动报错:java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.

参考文章:解决Hadoop在本地(windows)操作报错:Could not locate executable null\bin\winutils.exe in the Hadoop binaries._hadoop windows 解压失败_D奋斗的小菜鸟!的博客-CSDN博客,配置了CLASSPATH即可

五:正常编写代码

六:效果,能成功调用

  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Spring Boot 可以通过使用 Hadoop 客户端库来实现与 Hadoop 的集成。下面是实现步骤: 1. 在 pom.xml 文件中添加以下依赖项: ``` <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> ``` 2. 在 application.properties 文件中添加以下配置: ``` hadoop.fs.defaultFS=hdfs://localhost:9000 ``` 3. 创建一个 Hadoop 配置 Bean,用于创建 Hadoop 配置对象: ``` @Configuration public class HadoopConfiguration { @Value("${hadoop.fs.defaultFS}") private String hdfsUri; @Bean public org.apache.hadoop.conf.Configuration configuration() { return new org.apache.hadoop.conf.Configuration(); } @Bean public FileSystem fileSystem() throws IOException { return FileSystem.get(URI.create(hdfsUri), configuration()); } } ``` 4. 创建一个服务类,用于执行 Hadoop 操作: ``` @Service public class HadoopService { private final FileSystem fileSystem; public HadoopService(FileSystem fileSystem) { this.fileSystem = fileSystem; } public void createDirectory(String directoryPath) throws IOException { Path path = new Path(directoryPath); if (!fileSystem.exists(path)) { fileSystem.mkdirs(path); } } public void uploadFile(String localFilePath, String hdfsFilePath) throws IOException { Path localPath = new Path(localFilePath); Path hdfsPath = new Path(hdfsFilePath); fileSystem.copyFromLocalFile(localPath, hdfsPath); } public void downloadFile(String hdfsFilePath, String localFilePath) throws IOException { Path hdfsPath = new Path(hdfsFilePath); Path localPath = new Path(localFilePath); fileSystem.copyToLocalFile(hdfsPath, localPath); } } ``` 5. 在控制器中注入 HadoopService,并调用需要的操作: ``` @RestController public class HadoopController { private final HadoopService hadoopService; public HadoopController(HadoopService hadoopService) { this.hadoopService = hadoopService; } @PostMapping("/create-directory") public void createDirectory(@RequestParam String directoryPath) throws IOException { hadoopService.createDirectory(directoryPath); } @PostMapping("/upload-file") public void uploadFile(@RequestParam String localFilePath, @RequestParam String hdfsFilePath) throws IOException { hadoopService.uploadFile(localFilePath, hdfsFilePath); } @PostMapping("/download-file") public void downloadFile(@RequestParam String hdfsFilePath, @RequestParam String localFilePath) throws IOException { hadoopService.downloadFile(hdfsFilePath, localFilePath); } } ``` 这样就可以在 Spring Boot 应用中使用 Hadoop 了。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值