HDFS 常见API使用及编程

                                 HDFS 常见API使用及编程

 

1 pom 文件增加依赖

<dependencies>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>2.6.0-cdh5.16.2</version>
    </dependency>
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.12</version>
        <scope>test</scope>
    </dependency>
</dependencies>

2 调用HDFS API

2.1 创建文件夹

package com.xk.bigdata.hadoop.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.net.URI;

public class HDFSAPITest {

    FileSystem fileSystem = null;

    @Before
    public void setUp() throws Exception {
        Configuration conf = new Configuration();
        conf.set("dfs.replication", "1");
        URI uri = new URI("hdfs://bigdatatest02:8020");
        fileSystem = FileSystem.get(uri, conf, "hdfs");
    }

    @After
    public void cleanUp() throws Exception {
        if (null != fileSystem) fileSystem.close();
    }

    @Test
    public void mkdir() throws Exception {
        boolean res = fileSystem.mkdirs(new Path("/demo"));
        System.out.println(res);
    }

}

等效于 hadoop fs -mkdir / hdfs dfs -mkdir

 

2.2 从本地文件上传到HDFS

2.2.1 使用HDFS API

package com.xk.bigdata.hadoop.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.net.URI;

public class HDFSAPITest {

    FileSystem fileSystem = null;

    @Before
    public void setUp() throws Exception {
        Configuration conf = new Configuration();
        conf.set("dfs.replication", "1");
        URI uri = new URI("hdfs://bigdatatest02:8020");
        fileSystem = FileSystem.get(uri, conf, "hdfs");
    }

    @After
    public void cleanUp() throws Exception {
        if (null != fileSystem) fileSystem.close();
    }

    @Test
    public void copyFromLocalFile() throws Exception{
        // 文件地址
        Path src = new Path("E:\\workspace\\java\\hadoop-project\\hdfs-basic\\data\\demo.txt");
        // 目标地址
        Path dst = new Path("/demo/demo.txt");
        fileSystem.copyFromLocalFile(src,dst);
    }
}

2.2.2 使用IO流来上传文件

 

package com.xk.bigdata.hadoop.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.net.URI;

public class HDFSAPITest {

    FileSystem fileSystem = null;

    @Before
    public void setUp() throws Exception {
        Configuration conf = new Configuration();
        conf.set("dfs.replication", "1");
        URI uri = new URI("hdfs://bigdatatest02:8020");
        fileSystem = FileSystem.get(uri, conf, "hdfs");
    }

    @After
    public void cleanUp() throws Exception {
        if (null != fileSystem) fileSystem.close();
    }
    
    @Test
    public void copyFromLocalFileByIo() throws Exception {
        FSDataOutputStream outputStream = fileSystem.create(new Path("/demo/demo2.txt"), true);
        BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(new File("E:\\workspace\\java\\hadoop-project\\hdfs-basic\\data\\demo.txt")));
        IOUtils.copyBytes(inputStream, outputStream, 2048);
        IOUtils.closeStream(outputStream);
        IOUtils.closeStream(inputStream);
    }
}

 

等效于 hadoop fs -put /hdfs dfs -put

 

2.3 从HDFS上面下载文件

2.3.1 使用HDFS API

package com.xk.bigdata.hadoop.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.net.URI;

public class HDFSAPITest {

    FileSystem fileSystem = null;

    @Before
    public void setUp() throws Exception {
        Configuration conf = new Configuration();
        conf.set("dfs.replication", "1");
        URI uri = new URI("hdfs://bigdatatest02:8020");
        fileSystem = FileSystem.get(uri, conf, "hdfs");
    }

    @After
    public void cleanUp() throws Exception {
        if (null != fileSystem) fileSystem.close();
    }

    @Test
    public void copyToLocalFile() throws Exception {
        // HDFS路径
        Path src = new Path("/demo/demo.txt");
        // 本地路径
        Path dst = new Path("E:\\workspace\\java\\hadoop-project\\hdfs-basic\\data\\demo1.txt");
        fileSystem.copyToLocalFile(src, dst);
    }
}

 

2.3.2 使用IO流

package com.xk.bigdata.hadoop.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.*;
import java.net.URI;

public class HDFSAPITest {

    FileSystem fileSystem = null;

    @Before
    public void setUp() throws Exception {
        Configuration conf = new Configuration();
        conf.set("dfs.replication", "1");
        URI uri = new URI("hdfs://bigdatatest02:8020");
        fileSystem = FileSystem.get(uri, conf, "hdfs");
    }

    @After
    public void cleanUp() throws Exception {
        if (null != fileSystem) fileSystem.close();
    }
    
    @Test
    public void copyToLocalFileByIo() throws Exception {
        FSDataInputStream inputStream = fileSystem.open(new Path("/demo/demo1.txt"));
        BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(new File("E:\\workspace\\java\\hadoop-project\\hdfs-basic\\data\\demo2.txt")));
        IOUtils.copyBytes(inputStream, outputStream, 2048);
        IOUtils.closeStream(inputStream);
        IOUtils.closeStream(outputStream);
    }
}

等效于 hadoop fs -get/hdfs dfs -get

 

2.4 修改HDFS上面的文件名称

package com.xk.bigdata.hadoop.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.net.URI;

public class HDFSAPITest {

    FileSystem fileSystem = null;

    @Before
    public void setUp() throws Exception {
        Configuration conf = new Configuration();
        conf.set("dfs.replication", "1");
        URI uri = new URI("hdfs://bigdatatest02:8020");
        fileSystem = FileSystem.get(uri, conf, "hdfs");
    }

    @After
    public void cleanUp() throws Exception {
        if (null != fileSystem) fileSystem.close();
    }

    @Test
    public void reName() throws Exception {
        // 原文件路径
        Path src = new Path("/demo/demo.txt");
        // 新文件路径
        Path dst = new Path("/demo/demo1.txt");
        boolean res = fileSystem.rename(src, dst);
        System.out.println(res);
    }

等效于 hadoop fs -mv / hdfs dfs -mv

 

2.5 列出文件列表

package com.xk.bigdata.hadoop.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.*;
import java.net.URI;

public class HDFSAPITest {

    FileSystem fileSystem = null;

    @Before
    public void setUp() throws Exception {
        Configuration conf = new Configuration();
        conf.set("dfs.replication", "1");
        URI uri = new URI("hdfs://bigdatatest02:8020");
        fileSystem = FileSystem.get(uri, conf, "hdfs");
    }

    @After
    public void cleanUp() throws Exception {
        if (null != fileSystem) fileSystem.close();
    }
 
    @Test
    public void listFiles() throws Exception {
        /**
         * Path f : 路径
         * boolean recursive : 是否递归
         */
        RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(new Path("/demo"), true);
        while (files.hasNext()) {
            LocatedFileStatus file = files.next();
            String path = file.getPath().toString().trim();
            String isdic = file.isDirectory() ? "文件夹" : "文件";
            String owner = file.getOwner();
            System.out.println(isdic + "\t" + path + "\t" + owner);
            // 得到该文件的副本地址
            BlockLocation[] blockLocations = file.getBlockLocations();
            for (BlockLocation blockLocation : blockLocations) {
                // 得到该副本的存储地址
                String[] hosts = blockLocation.getHosts();
                for (String host : hosts) {
                    System.out.println(host);
                }
            }

        }
    }   
}

等效于 hadoop fs -ls / hdfs dfs -ls

 

2.6 删除文件或者文件夹

package com.xk.bigdata.hadoop.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.*;
import java.net.URI;

public class HDFSAPITest {

    FileSystem fileSystem = null;

    @Before
    public void setUp() throws Exception {
        Configuration conf = new Configuration();
        conf.set("dfs.replication", "1");
        URI uri = new URI("hdfs://bigdatatest02:8020");
        fileSystem = FileSystem.get(uri, conf, "hdfs");
    }

    @After
    public void cleanUp() throws Exception {
        if (null != fileSystem) fileSystem.close();
    }
    
    @Test
    public void delete() throws Exception {
        /**
         * Path f : 删除文件的路径
         * boolean recursive : 是否递归
         */
        boolean res = fileSystem.delete(new Path("/demo"), true);
        System.out.println(res);
    }
}

HDFS API TEST Code

3 使用HDFS API 案例

3.1 需求

使用HDFS API 封装一个 rich reName 方法

效果:
/bigdata/hdfs-works/20211001
							  /1.txt
							  /2.txt
							  /3.txt
/bigdata/hdfs-works/20211002
							  /1.txt
							  /2.txt
							  /3.txt
==>
/bigdata/hdfs-works/20211001-0.txt
/bigdata/hdfs-works/20211001-1.txt
/bigdata/hdfs-works/20211001-2.txt
/bigdata/hdfs-works/20211002-0.txt
/bigdata/hdfs-works/20211002-1.txt
/bigdata/hdfs-works/20211002-2.txt

3.2 Code

3.2.1 HDFSUtils

package com.xk.bigdata.hadoop.utils;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import java.net.URI;

public class HDFSUtils {

    FileSystem fileSystem = null;

    /**
     * 文件系统初始化
     */
    public void stepUp() throws Exception {
        Configuration conf = new Configuration();
        conf.set("dfs.replication", "1");
        URI uri = new URI(FinalCode.HDFS_URI);
        fileSystem = FileSystem.get(uri, conf, FinalCode.HDFS_USER_NAME);
    }

    /**
     * 关闭文件系统
     */
    public void cleanUp() throws Exception {
        if (null != fileSystem) {
            fileSystem.close();
        }
    }

    /**
     * 创建文件夹
     *
     * @param path : 需要创建的目录
     */
    public boolean mkdir(String path) throws Exception {
        return fileSystem.mkdirs(new Path(path));
    }

    /**
     * 从本地上传文件
     *
     * @param srcPath : 文件地址
     * @param dstPath : 目标地址
     */
    public void copyFromLocalFile(String srcPath, String dstPath) throws Exception {
        // 文件地址
        Path src = new Path(srcPath);
        // 目标地址
        Path dst = new Path(dstPath);
        fileSystem.copyFromLocalFile(src, dst);
    }

    /**
     * 从HDFS下载文件
     *
     * @param srcPath : 文件地址
     * @param dstPath : 目标地址
     */
    public void copyToLocalFile(String srcPath, String dstPath) throws Exception {
        // HDFS路径
        Path src = new Path(srcPath);
        // 本地路径
        Path dst = new Path(dstPath);
        fileSystem.copyToLocalFile(src, dst);
    }

    /**
     * 修改名称
     *
     * @param oldPath : 原路径
     * @param newPath : 新的路径
     */
    public boolean reName(String oldPath, String newPath) throws Exception {
        // 原文件路径
        Path src = new Path(oldPath);
        // 新文件路径
        Path dst = new Path(newPath);
        return fileSystem.rename(src, dst);
    }

    /**
     * 文件列表
     *
     * @param pathString : 路径
     * @param recursive  : 是否递归
     */
    public RemoteIterator<LocatedFileStatus> listFiles(String pathString, Boolean recursive) throws Exception {
        return fileSystem.listFiles(new Path(pathString), recursive);
    }

    /**
     * 文件列表
     *
     * @param pathString : 路径
     */
    public RemoteIterator<LocatedFileStatus> listFiles(String pathString) throws Exception {
        return fileSystem.listFiles(new Path(pathString), true);
    }

    /**
     * 删除文件或者文件夹
     *
     * @param pathString : 删除文件的路径
     * @param recursive  : 是否递归
     */
    public boolean delete(String pathString, boolean recursive) throws Exception {
        return fileSystem.delete(new Path(pathString), recursive);
    }

    /**
     * 删除文件
     *
     * @param pathString : 删除文件的路径
     */
    public boolean delete(String pathString) throws Exception {
        return fileSystem.delete(new Path(pathString), true);
    }

    /**
     * 判断路径是否存在
     *
     * @param pathString : 路径
     */
    public boolean isExist(String pathString) throws Exception {
        return fileSystem.exists(new Path(pathString));
    }

}

3.2.2 FinalCode

package com.xk.bigdata.hadoop.utils;

public class FinalCode {

    // HDFS param

    public final static String HDFS_URI = "hdfs://bigdatatest02:8020";

    public final static String HDFS_USER_NAME = "hdfs";
}

3.2.2 HDSFReName

package com.xk.bigdata.hadoop.hdfs;

import com.xk.bigdata.hadoop.utils.HDFSUtils;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.RemoteIterator;

public class HDSFReName {

    /**
     * 1. 通过路径读取下面层级文件的全部路径
     * 2. 修改文件路径
     * 3. 删除原文件夹路径
     */
    public static void reName(String pathString) {
        HDFSUtils hdfsUtils = new HDFSUtils();
        try {
            hdfsUtils.stepUp();
            RemoteIterator<LocatedFileStatus> files = hdfsUtils.listFiles(pathString);
            while (files.hasNext()) {
                LocatedFileStatus file = files.next();
                String filePath = file.getPath().toString();
                String newFilePath = filePath.substring(0, filePath.lastIndexOf("/")) + "-" + filePath.substring(filePath.lastIndexOf("/") + 1);
                boolean res = hdfsUtils.reName(filePath, newFilePath);
                System.out.println(res);
            }
            System.out.println(hdfsUtils.delete(pathString));
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                hdfsUtils.cleanUp();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    public static void main(String[] args) {
        String pathString = "/bigdata/hdfs-works/20211001";
        reName(pathString);
    }

}

reName 代码连接

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值