hadoop概念04-HDFS的Java API 操作(辅助理解HDFS的流程)

最新推荐文章于 2022-11-28 20:34:08 发布

xiaoxaoyu

最新推荐文章于 2022-11-28 20:34:08 发布

阅读量325

点赞数

分类专栏： Hadoop 文章标签： hadoop

本文链接：https://blog.csdn.net/xiaoxaoyu/article/details/110570475

版权

Hadoop 专栏收录该内容

13 篇文章 0 订阅

订阅专栏

HDFS常见的Java API 操作

1、HDFS 文件上传
2、HDFS文件下载
3、HDFS 文件夹删除
4、HDFS 文件名更改
5、HDFS 文件详情查看
6、HDFS 文件和文件夹判断
7、HDFS 的 I/O 流操作
8、HDFS 文件下载
9.定位文件读取

HDFS的API就两个：FileSystem 和Configuration

1、HDFS 文件上传

通过configuration.set方法设置设置副本的存储数量
参数优先级排序依次降低：客户端代码中设置的值，classpath 下的用户自定义配置文件，然后是服务器的默认配置。

    @Test
    public void testCopyFromLocalFile() throws URISyntaxException, IOException, InterruptedException {
    // 1 获取文件系统
        Configuration configuration = new Configuration();
        configuration.set("dfs.replication", "2");
        FileSystem fs = FileSystem.get(new URI("hdfs://192.168.221.140:9000"), configuration, "root");
        // 2 上传文件
        fs.copyFromLocalFile(new Path("D:\\Hadoop\\hadoop"), new Path("/kb10/hadoop"));
        fs.close();
        System.out.println("ok");
    }

2、HDFS文件下载

    @Test
    public void testCopyToLocalFile() throws URISyntaxException, IOException, InterruptedException {
    	// 获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://192.168.221.140:9000"), configuration, "root");
        // 执行下载操作 
        // boolean delSrc 指是否将原文件删除 
        // Path src 指要下载的文件路径 
        // Path dst 指将文件下载到的路径 
        // boolean useRawLocalFileSystem 是否开启文件校验
        fs.copyToLocalFile(true, new Path("/kb10/hadoop"), new Path("D:/hadoop"), true);
        fs.close();
    }

3、HDFS 文件夹删除

    @Test
    public void testDelete() throws IOException, URISyntaxException, InterruptedException {
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://192.168.221.140:9000"), configuration, "root");
        // 执行删除
        fs.delete(new Path("/app/data/exam"));
        System.out.println("ok");
    }

4、HDFS 文件名更改

    @Test
    public void testRename() throws URISyntaxException, IOException, InterruptedException {
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://192.168.221.140:9000"), configuration, "root");
        fs.rename(new Path("/kb10/hadoop"), new Path("/kb10/hadoophahaha"));
        fs.close();
        System.out.println("ok");
    }

5、HDFS 文件详情查看

客户端是可以获取到每个文件的文件块位置信息，对于理解 MapReduce 有一定的帮助。
查看文件名称、权限、长度、块信息：

    @Test
    public void testListFiles() throws IOException, URISyntaxException, InterruptedException {
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://192.168.221.140:9000"), configuration, "root");
        RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
        while (listFiles.hasNext()) {
            LocatedFileStatus status = listFiles.next();
            // 文件名称
            System.out.println(status.getPath().getName());
            // 长度
            System.out.println(status.getLen());
            // 权限
            System.out.println(status.getPermission());
            // 组
            System.out.println(status.getGroup());
            // 获取存储的块信息
            BlockLocation[] blockLocations = status.getBlockLocations();
            for (BlockLocation blockLocation : blockLocations) {
            // 获取块存储的主机节点
                String[] hosts = blockLocation.getHosts();
                for (String host : hosts) {
                    System.out.print(host + "\n");
                }
            }
            System.out.println("------------------------------------------------");
        }
    }

6、HDFS 文件和文件夹判断

    @Test
    public void testListStatus() throws URISyntaxException, IOException, InterruptedException {
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://192.168.221.140:9000"), configuration, "root");
        // 判断是文件还是文件夹
        FileStatus[] listStatus = fs.listStatus(new Path("/"));
        for (FileStatus fileStatus : listStatus) {
        // 如果是文件
            if (fileStatus.isFile()) {
                System.out.println("f:" + fileStatus.getPath().getName());
            } else {
                System.out.println("d:" + fileStatus.getPath().getName());
            }
        }
        fs.close();
    }

7、HDFS 的 I/O 流操作

需求：将本地文件通过流的方式上传到 HDFS 文件系统。

    @Test
    public void putFileToHDFS() throws URISyntaxException, IOException, InterruptedException {
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://192.168.221.140:9000"), configuration, "root");
        FileInputStream fis = new FileInputStream(new File("D:\\hadoop"));
        FSDataOutputStream fos = fs.create(new Path("/mydir"));
        // 执行流拷贝
        IOUtils.copyBytes(fis, fos, configuration);
        IOUtils.closeStream(fis);
        IOUtils.closeStream(fos);
        fs.close();
        System.out.println("ok");
    }

8、HDFS 文件下载

需求：使用流的方式从HDFS下载文件到本地磁盘上。

    @Test
    public void getFileToHDFS() throws URISyntaxException, IOException, InterruptedException {
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://192.168.221.140:9000"), configuration, "root");
        FSDataInputStream fis = fs.open(new Path("/mydir"));
        FileOutputStream fos = new FileOutputStream(new File("D:\\testhadoop"));
        //流的对拷
        IOUtils.copyBytes(fis, fos, configuration);
        IOUtils.closeStream(fis);
        IOUtils.closeStream(fos);
        fs.close();
        System.out.println("ok");
    }

9.定位文件读取

这里可以设置任意位置读取 hdfs 文件，对于 mapreduce 分片 inputsplit 和 spark 分区理解有一定帮助。

下载第一块：

    @Test
    public void readFileSeek1() throws URISyntaxException, IOException, InterruptedException {
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://192.168.221.140:9000"), configuration, "root");
        FSDataInputStream fis = fs.open(new Path("mytext.tar.gz"));
        FileOutputStream fos = new FileOutputStream(new File("d:/mytext.tar.gz.part1"));
        byte[] buf = new byte[1024];
        for (int i = 0; i < 1024 * 128; i++) {
            fis.read(buf);
            fos.write(buf);
        }
        IOUtils.closeStream(fis);
        IOUtils.closeStream(fos);
    }

下载第二块：

    @Test
    public void readFileSeek2() throws URISyntaxException, IOException, InterruptedException {
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://192.168.221.140:9000"), configuration, "root");
        FSDataInputStream fis = fs.open(new Path("mytext.tar.gz"));
        fis.seek(1024*1024*128);
        FileOutputStream fos = new FileOutputStream(new File("d:/mytext.tar.gz.part2"));
        IOUtils.copyBytes(fis,fos,configuration);
        IOUtils.closeStream(fis);
        IOUtils.closeStream(fos);
    }