HDFS Java API的编写和使用

最新推荐文章于 2024-03-18 19:50:36 发布

又迷茫了

最新推荐文章于 2024-03-18 19:50:36 发布

阅读量151

点赞数

文章标签： hdfs java hadoop

本文链接：https://blog.csdn.net/2202_75334392/article/details/134172245

版权

首先

需要搭建Hadoop环境，并且启动Hadoop

如果没有搭建，可以看这篇文章：

Hadoop集群搭建和配置-CSDN博客

这里我是用windows下的idea连接虚拟机的Hadoop

（一）在windows下安装hadoop

安装的Hadoop，需要和虚拟机的Hadoop版本相同，jdk版本也得相同

可以通过命令查看

hadoop version//hadoop-version
Java version//java-version

（二）hdfs Java api

1、实现数据的写入，

参考代码如下：

新建一个类：

package org.example;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import java.net.URI;
public class writeFileOnHDFS {
    @Test
    public void write1() throws Exception {
        // 创建配置对象
        Configuration conf = new Configuration();
        conf.set("dfs.client.use.datanode.hostname", "true");
        String uri = "hdfs://192.168.170.80:8020";
        FileSystem fs = FileSystem.get(new URI(uri), conf);
        Path path = new Path(uri + "/lyf/hadoop/lyf0316.txt");
        FSDataOutputStream out = fs.create(path);
        out.write("habse".getBytes());
        out.close();
        fs.close();
        System.out.println("文件[" + path + "]写入成功！");
    }
}

2、实现数据的读取，

参考代码如下

package org.example;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;

public class ReadFileOnHDFS {
    @Test
    public void read1() throws Exception {
        Configuration conf = new Configuration();
        conf.set("dfs.client.use.datanode.hostname", "true");
        String uri = "hdfs://192.168.170.80:8020";
        FileSystem fs = FileSystem.get(new URI(uri), conf, "root");
        Path path = new Path(uri + "/lyf/hadoop/lyf0316.txt" + "");
        FSDataInputStream in = fs.open(path);
        BufferedReader br = new BufferedReader(new InputStreamReader(in));
        String nextLine = "";
        while ((nextLine = br.readLine()) != null) {
            System.out.println(nextLine);
        }
        br.close();
        in.close();
        fs.close();
    }
}

3.获取HDFS下的所有文件

参考代码如下

package net.army.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import java.io.*;
import java.text.SimpleDateFormat;

public class HDFSOutputFile {
    public static void lsDir(Configuration conf, String remoteDir)
                throws IOException {
            FileSystem fs = FileSystem.get(conf);
            Path dirPath = new Path(remoteDir);

            RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(
                    dirPath, true);
            while (remoteIterator.hasNext()) {
                FileStatus s = remoteIterator.next();
                System.out.println("路径:" + s.getPath().toString());
                System.out.println("权限:" + s.getPermission().toString());
                System.out.println("大小:" + s.getLen());
                Long timeStamp = s.getModificationTime();
                SimpleDateFormat format = new SimpleDateFormat("yyy-MM-dd HH:mm:ss");
                String date = format.format(timeStamp);
                System.out.println("时间:" + date);
                System.out.println();
            }
            fs.close();
        }
        public static void main(String[] args) {
            Configuration conf = new Configuration();
            conf.set("fs.default.name", "hdfs://192.168.170.80:8020");
            String remoteDir = "/"; // HDFS路径
            try {
                System.out.println("(递归)读取目录下所有文件的信息: " + remoteDir);
                HDFSOutputFile.lsDir(conf, remoteDir);
                System.out.println("读取完成");
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
}

4.上传文件到HDFS上（windows下的文件到Linux里面）

package org.example;

import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;



public class CopyFromLocalFile {
    public static void main(String[] args)  throws Exception{
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://192.168.170.80:8020");
        System.setProperty("HADOOP_USER_NAME","root");
        FileSystem fs = FileSystem.get(conf);
        fs.copyFromLocalFile(new Path("e:/test/test.txt"),new Path("/lyf"));
        fs.close();
    }
}

5.新增文件夹

package org.example;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;

import java.net.URI;
public class CreateNewFolder {
    @Test
    public void createDir01() {
        String HDFS_PATH = "hdfs://192.168.170.80:8020";
        Configuration configuration = null;
        FileSystem fileSystem = null;

        configuration = new Configuration();
        try {
            fileSystem =FileSystem.get(new URI(HDFS_PATH),configuration,"root");

            fileSystem.mkdirs(new Path("/linux"));

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            fileSystem = null;
            configuration = null;
            System.out.println("--------------end---------------");
        }
    }
}

6.文件夹的重命名

package org.example;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;

import java.net.URI;

public class ChangeFileName {
    @Test
    public void reameFile03(){
        String HDFS_PATH = "hdfs://192.168.170.80:8020/";
        Configuration configuration = null;
        FileSystem fileSystem = null;
        configuration = new Configuration();
        try {
            fileSystem =FileSystem.get(new URI(HDFS_PATH),configuration,"root");
            Path oldPath = new Path("/linux/linux.txt");
            Path newPath = new Path("/linux/hadoop.txtt");
            System.out.println(fileSystem.rename(oldPath,newPath));
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            fileSystem = null;
            configuration = null;
        }
    }
}

7.判断文件是否存在9.

package org.example;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class FileExit {
    public static void main(String[] args){
        try{
            String fileName = "/lyf";
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", "hdfs://192.168.170.80:8020");
            conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
            FileSystem fs = FileSystem.get(conf);
            if(fs.exists(new Path(fileName))){
                System.out.println("文件存在");
            }else{
                System.out.println("文件不存在");
            }

        }catch (Exception e){
            e.printStackTrace();
        }
    }
}

8.获取大文件分成的块信息

package net.army.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progress;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.URI;

/**
 * 使用Java API操作HDFS文件系统
 *
 * 因为是放在test下面，所以最好使用单元测试的方式
 * 在pom中引入的jUnit单元测试的方式
 * 单元测试有两个方法：（1）在单元测试之前进行；（2）在单元测试之后进行
 *
 * 关键点：
 * 1）创建Configuration
 * 2)获取FileSystem
 * 3)剩下的是HDFS API的操作
 */

public class HDFSApp {


    public static final String HDFS_PATH = "hdfs://192.168.170.80:8020";
    Configuration configuration = null;
    FileSystem fileSystem = null;

    @Before
    public void setup() throws Exception{
        System.out.println("-----setup-----");
        configuration = new Configuration();
        configuration.set("dfs.replication", "1");
        fileSystem = FileSystem.get(new URI("hdfs://192.168.170.80:8020"), configuration, "root");
    }

    /*
     * 查看文件块信息
     */
    @Test
    public void getFileBlockLocations() throws Exception{
        FileStatus fileStatus = fileSystem.getFileStatus(new Path("/lyf/hadoop/hadoop-3.1.3.tar.gz"));
        BlockLocation[] blocks = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());

        for(BlockLocation block : blocks){

            for(String name: block.getNames()){
                System.out.println(name + " : " + block.getOffset() + " : " + block.getLength());
            }
        }
    }

    @After
    public void tearDown(){
        System.out.println("-----tearDown-----");

        //置空
        configuration = null;
        fileSystem = null;
    }
}

9. 获取集群上所有节点

package net.army.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
public class GetNode {
    public static void main(String[] args) throws Exception {
        Configuration conf=new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.170.80:8020");
        FileSystem fs=FileSystem.get(conf);
        DistributedFileSystem dfs = (DistributedFileSystem)fs;
        DatanodeInfo[] Infos = dfs.getDataNodeStats();

        for(int i=0;i<Infos.length;i++){
            System.out.println("DataNode_"+i+"_Name:"+Infos[i].getHostName());
        }
    }
}