【hadoop】java api的使用

最新推荐文章于 2024-06-24 03:15:45 发布

haohulala

最新推荐文章于 2024-06-24 03:15:45 发布

阅读量1.1k

点赞数 2

分类专栏： haoop

本文链接：https://blog.csdn.net/haohulala/article/details/108782500

版权

haoop 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

本文记录了使用Hadoop Java API进行文件操作的实践，包括创建、删除、读取文件，创建目录，上传下载文件及查看目录内容。遇到的问题包括Win10访问Ubuntu上的Hadoop集群需要修改配置以及重启后DataNode不工作的问题，通过修改配置和重新格式化解决。

摘要由CSDN通过智能技术生成

今天学习了hadoop中的java api的简单使用，这篇文章就记录一下今天的学习成果。

hadoop中java编程的基本套路就是需要先实例化一个FileSystem，因为每一次操作都要用到这个FileSystem，所以我们就将这部分代码抽取出来，写到一个工具类里。
Util.java

public class Util {

    private  Configuration conf;
    private  String hdfsPath = "hdfs://192.168.228.141:9000";
    private  FileSystem fs;

    //空参数构造函数
    Util(){

    }

    //修改hdfsPath的构造函数
    Util(String hdfsPath){
        this.hdfsPath = hdfsPath;
    }

    //修改conf的构造函数
    Util(Configuration conf){
        this.conf = conf;
    }

    //两个都修改的构造函数
    Util(String hdfsPath, Configuration conf){
        this.hdfsPath = hdfsPath;
        this.conf = conf;
    }

    public  FileSystem getfs(){
        this.conf = new Configuration();
        try {
            fs = FileSystem.get(new URI(this.hdfsPath), this.conf);

        } catch (IOException e) {
            e.printStackTrace();
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
        return this.fs;
    }

    public Configuration getConf() {
        return conf;
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
    }

    public String getHdfsPath() {
        return hdfsPath;
    }

    public void setHdfsPath(String hdfsPath) {
        this.hdfsPath = hdfsPath;
    }

    public FileSystem getFs() {
        return fs;
    }

    public void setFs(FileSystem fs) {
        this.fs = fs;
    }

}

其中hdfsPath应该写自己的hadoop集群中namenode的ip地址，默认端口号为9000。
有了这个工具类之后，我们在之后的操作中只需要获取一次FileSystem就可以一直使用了。
下面我们来封装hadoop中java api。
首先是创建文件

// 创建文件，并且写入data
    /*
        状态码：1代表写入成功，0代表写入不成功，有异常抛出，-1代表文件存在
    */
    public int createFile(String filename, String data){
        try {
            boolean exist = this.fs.exists(new Path(filename));
            if(exist){
                System.out.println("文件已经存在，创建失败");
                return -1;  //文件存在
            }
            FSDataOutputStream outputStream = this.fs.create(new Path(filename));
            byte[] buff = data.getBytes();
            outputStream.write(buff, 0, buff.length);
            outputStream.close();
            System.out.println("文件创建成功，并且已经写入数据");
            outputStream.close();
            return 1;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return 0;
    }

// 创建文件，不写入内容
    /*
        状态码：1代表写入成功，0代表写入不成功，有异常抛出，-1代表文件存在
    */
    public int createFile(String filename){
        try {
            boolean exist = this.fs.exists(new Path(filename));
            if(exist){
                System.out.println("文件存在，创建失败");
                return -1;  //文件存在
            }
            FSDataOutputStream outputStream = this.fs.create(new Path(filename));
            System.out.println("创建文件成功，没有写入任何数据");
            outputStream.close();
            return 1;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return 0;
    }

接着是删除文件

//删除文件
    /*
    * 状态码：1代表删除成功，0代表删除失败
    * */
    public int deleteFile(String filename){
        try {
            boolean exist = fs.exists(new Path(filename));
            if(exist==false){
                System.out.println("文件不存在，删除失败");
                return 0;
            }
            boolean result = fs.delete(new Path(filename), true);
            if(result){
                return 1;
            } else {
                return 0;
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return 0;

    }

接着是创建目录

//创建目录
    /*
    * 状态码：1代表创建成功，0代表创建失败
    * */
    public int mkdirs(String dir){
        try {
            boolean exist = fs.exists(new Path(dir));
            if (exist) {
                System.out.println("文件夹存在，创建失败");
                return 0;
            }
            boolean result = fs.mkdirs(new Path(dir));
            if(result){
                System.out.println("创建成功");
                return 1;
            } else {
                System.out.println("创建失败");
                return 0;
            }
        } catch (IOException e){
            e.printStackTrace();
        }
        return 0;
    }

接着是读取文件并显示

//读取文件
    /**
     * 状态码：1代表读取成功，0代表读取失败
     * */
    public int readFile(String filename){
        try {
            boolean exist = fs.exists(new Path(filename));
            if (!exist){
                System.out.println("文件不存在，读取失败");
                return 0;
            }
            FSDataInputStream inputStream = fs.open(new Path(filename));
            BufferedReader buff = new BufferedReader(new InputStreamReader(inputStream));
            String line = null;
            while((line=buff.readLine())!=null){
                System.out.println(line);
            }
            inputStream.close();
            return 1;
        } catch (IOException e){
            e.printStackTrace();
        }
        return 0;
    }

接着是上传本地文件到hadoop

    //从本地上传文件到hadoop
    public int upload(String localdir, String remotedir){
        try {
            fs.copyFromLocalFile(new Path(localdir), new Path(remotedir));
            System.out.println("上传成功,上传到：" + remotedir);
            return 1;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return 1;
    }

接着是从hadoop下载文件到本地，这个函数里面我没有直接使用java api，而是使用FileSystem打开文件，然后使用IOUtils进行二进制数据复制，原因是使用java api的时候总是报错，索性自己写了，经过测试，可以下载文本文件，也可以下载图片。

 //从hadoop上下载文件到本地
    public int download(String localdir, String remotedir){

        try {
            //获取本地文件路径
            String[] remotefilename = remotedir.split("/");
            String filename = localdir+remotefilename[remotefilename.length-1];

            //从hadoop上读取文件
            FSDataInputStream inputStream = fs.open(new Path(remotedir));
            //在本地创建相应文件
            File file = new File(filename);
            FileOutputStream localFile = new FileOutputStream(file);
            //下面进行二进制复制
            IOUtils.copyBytes(inputStream, localFile, 2048, true);
            System.out.println("文件下载完成："+filename);
            inputStream.close();
            localFile.close();
            return 1;
        } catch (IOException e) {
            e.printStackTrace();
        } catch (NullPointerException e){
            e.printStackTrace();
        }
        return 0;
    }

最后是查看给定目录下的所有文件，使用递归的方式，有点慢

//查看目录下所有文件
    public void getdirs(String dir){
        try {
            FileStatus[] status = fs.listStatus(new Path(dir));
            for (FileStatus fileStatus : status) {
                if(fileStatus.isDirectory()){
                    System.out.println("d :"+fileStatus.getPath().toString());
                } else {
                    System.out.println("f :"+fileStatus.getPath().toString());
                }

                if(fileStatus.isDirectory()){
                    getdirs(fileStatus.getPath().toString());
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

在学习的过程中，我遇到了两个问题，这里记录一下。

第一个问题是在win10下访问不到ubuntu虚拟机上的hadoop集群
解决的方法是修改hadoop/etc/hadoop/core-site.xml中的url，将原来的localhost修改成了hadoop-allow，并且在**/etc/hosts中添加一条ip映射0.0.0.0 hadoop-allow**，这样就代表所有人都可以访问到hadoop集群的9000端口。
在linux命令行中输入 netstat -tpnl，当现实内容如下的时候就说明任何主机都可以正常访问hadoop了
在这里插入图片描述
主要就是要看9000端口对应的是0.0.0.0还是127.0.0.1。学过计算机网络的同学应该都还记得0.0.0.0代表默认ip地址，127.0.0.1代表本主机，是个回环地址。

第二个问题是在ubuntu关机重启后datanode不工作了
使用50070界面查看不到datanode，解决的方法比较暴力，就是将hadoop/etc/hadoop/hdfs-site.xml文件中设置的路径删除，然后重新创建，创建完成后使用 hadoop namenode -format 初始化之后就可以正常使用了。这个问题的原理我不懂，如果有懂的同学欢迎在下面留言告诉我。

最后放上完整的代码
hdfsAPI.java

import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;



import java.io.*;

//这个类写hdfs中的各种操作
public class hdfsAPI {


    public static void main(String[] args) {

        hdfsAPI api = new hdfsAPI();
        api.getdirs("/input");


    }

    private FileSystem fs;

    hdfsAPI(){
        this.fs = new Util().getfs();
    }

    // 创建文件，并且写入data
    /*
        状态码：1代表写入成功，0代表写入不成功，有异常抛出，-1代表文件存在
    */
    public int createFile(String filename, String data){
        try {
            boolean exist = this.fs.exists(new Path(filename));
            if(exist){
                System.out.println("文件已经存在，创建失败");
                return -1;  //文件存在
            }
            FSDataOutputStream outputStream = this.fs.create(new Path(filename));
            byte[] buff = data.getBytes();
            outputStream.write(buff, 0, buff.length);
            outputStream.close();
            System.out.println("文件创建成功，并且已经写入数据");
            outputStream.close();
            return 1;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return 0;
    }

    // 创建文件，不写入内容
    /*
        状态码：1代表写入成功，0代表写入不成功，有异常抛出，-1代表文件存在
    */
    public int createFile(String filename){
        try {
            boolean exist = this.fs.exists(new Path(filename));
            if(exist){
                System.out.println("文件存在，创建失败");
                return -1;  //文件存在
            }
            FSDataOutputStream outputStream = this.fs.create(new Path(filename));
            System.out.println("创建文件成功，没有写入任何数据");
            outputStream.close();
            return 1;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return 0;
    }


    //删除文件
    /*
    * 状态码：1代表删除成功，0代表删除失败
    * */
    public int deleteFile(String filename){
        try {
            boolean exist = fs.exists(new Path(filename));
            if(exist==false){
                System.out.println("文件不存在，删除失败");
                return 0;
            }
            boolean result = fs.delete(new Path(filename), true);
            if(result){
                return 1;
            } else {
                return 0;
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return 0;

    }


    //创建目录
    /*
    * 状态码：1代表创建成功，0代表创建失败
    * */
    public int mkdirs(String dir){
        try {
            boolean exist = fs.exists(new Path(dir));
            if (exist) {
                System.out.println("文件夹存在，创建失败");
                return 0;
            }
            boolean result = fs.mkdirs(new Path(dir));
            if(result){
                System.out.println("创建成功");
                return 1;
            } else {
                System.out.println("创建失败");
                return 0;
            }
        } catch (IOException e){
            e.printStackTrace();
        }
        return 0;
    }


    //读取文件
    /**
     * 状态码：1代表读取成功，0代表读取失败
     * */
    public int readFile(String filename){
        try {
            boolean exist = fs.exists(new Path(filename));
            if (!exist){
                System.out.println("文件不存在，读取失败");
                return 0;
            }
            FSDataInputStream inputStream = fs.open(new Path(filename));
            BufferedReader buff = new BufferedReader(new InputStreamReader(inputStream));
            String line = null;
            while((line=buff.readLine())!=null){
                System.out.println(line);
            }
            inputStream.close();
            return 1;
        } catch (IOException e){
            e.printStackTrace();
        }
        return 0;
    }


    //从本地上传文件到hadoop
    public int upload(String localdir, String remotedir){
        try {
            fs.copyFromLocalFile(new Path(localdir), new Path(remotedir));
            System.out.println("上传成功,上传到：" + remotedir);
            return 1;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return 1;
    }

    //从hadoop上下载文件到本地
    public int download(String localdir, String remotedir){

        try {
            //获取本地文件路径
            String[] remotefilename = remotedir.split("/");
            String filename = localdir+remotefilename[remotefilename.length-1];

            //从hadoop上读取文件
            FSDataInputStream inputStream = fs.open(new Path(remotedir));
            //在本地创建相应文件
            File file = new File(filename);
            FileOutputStream localFile = new FileOutputStream(file);
            //下面进行二进制复制
            IOUtils.copyBytes(inputStream, localFile, 2048, true);
            System.out.println("文件下载完成："+filename);
            inputStream.close();
            localFile.close();
            return 1;
        } catch (IOException e) {
            e.printStackTrace();
        } catch (NullPointerException e){
            e.printStackTrace();
        }
        return 0;
    }

    //查看目录下所有文件
    public void getdirs(String dir){
        try {
            FileStatus[] status = fs.listStatus(new Path(dir));
            for (FileStatus fileStatus : status) {
                if(fileStatus.isDirectory()){
                    System.out.println("d :"+fileStatus.getPath().toString());
                } else {
                    System.out.println("f :"+fileStatus.getPath().toString());
                }

                if(fileStatus.isDirectory()){
                    getdirs(fileStatus.getPath().toString());
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

    }


}