大数据实验Day05----通过JavaAPI操作

_TuZero

已于 2024-06-03 08:48:01 修改

阅读量132

点赞数 5

文章标签：大数据

于 2024-05-31 16:35:50 首次发布

本文链接：https://blog.csdn.net/zero_zhengxy/article/details/139339168

版权

1.配置windows的hadoop环境

（1）将D:\hadoop\hadoop3.1.3\bin中的hadoop.dll文件复制到C:\Windows\System32中

（2）配置环境变量

（3）点击winutils.exe，检查是否配置成功。若弹框一闪而过就代表成功，不成功再安装微软运行库(没报错，所以没有看具体怎么操作)。

（4）创建idea项目

maven配置阿里云仓库配置（修改setting.xml文件）

<?xml version="1.0" encoding="UTF-8"?>
<settings>
  <!-- 本地仓库地址Local repository  修改成自己的地址-->
  <localRepository>D:\360Download\LIULANQI\repository</localRepository>
  <servers></servers>
  <pluginGroups></pluginGroups>
 
  <mirrors>
    <mirror>
      <id>aliyunmaven</id>
      <mirrorOf>*</mirrorOf>
      <name>阿里云公共仓库</name>
      <url>https://maven.aliyun.com/repository/public</url>
    </mirror>
  </mirrors>
 
  <profiles>
    <!-- 全局JDK1.8配置 -->
    <profile>
      <id>jdk1.8</id>
      <activation>
        <activeByDefault>true</activeByDefault>
        <jdk>1.8</jdk>
      </activation>
      <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <maven.compiler.compilerVersion>1.8</maven.compiler.compilerVersion>
      </properties>
    </profile> 
  </profiles>
 
</settings>

（5）在pom.xml中添加依赖

 <dependencies>

    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.12</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>3.1.3</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs</artifactId>
        <version>3.1.3</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>3.1.3</version>
    </dependency>

    </dependencies>

2.在IDEA中操作hdfs文件（windows主机）

（1）创建目录

package com.igeekhome.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

//调用hdfs的java api
public class HdfsApiClient {

    //文件系统对象
    FileSystem fileSystem= null;

    //初始化hdfs文件系统
    @Before
    public void init() throws URISyntaxException, IOException, InterruptedException {

        //创建文件系统的配置对象
        Configuration conf = new Configuration();

        //创建系统文件对象
        fileSystem=FileSystem.get(new URI("hdfs://bigdata02:8020"),conf,"root");

        System.out.println("hdfs文件系统初始化成功！");
    }

    //创建hdfs目录
    @Test
    public void createPath() throws IOException {

        boolean result = fileSystem.mkdirs(new Path("/hdfs_api"));
        if (result){
            System.out.println("创建目录成功！");
        }else {
            System.out.println("创建目录失败！");
        }
    }

    //关闭文件系统对象
    @After
    public void close() throws IOException {

        if(fileSystem !=null){
            fileSystem.close();
            System.out.println("hdfs文件系统已关闭！");
        }
    }
}

（2）删除hdfs目录

//删除hdfs目录
    @Test
    public void deletePath() throws IOException {

        Path deletePath = new Path("hdfs_api");

        //判断hdfs上是否存在该目录
        if(fileSystem.exists(deletePath)){
            //目录存在，进行删除
            boolean result = fileSystem.delete(deletePath,false);
            System.out.println(result == true ? "删除目录成功" : "删除目录失败");
        }else{
            System.out.println("要删除的目录在hdfs上不存在");
        }
    }

递归删除（异常直接抛出）

 //删除hdfs目录
    @Test
    public void deletePath() throws IOException {

        Path deletePath = new Path("hdfs_api");

        //判断hdfs上是否存在该目录
        if(fileSystem.exists(deletePath)){
            //目录存在，进行删除
            //delete(Path f, boolean recursive)
            //第一个参数是删除的路径，第二个参数设置是否递归删除
            boolean result = fileSystem.delete(deletePath,true);
            System.out.println(result == true ? "删除目录成功" : "删除目录失败");
        }else{
            System.out.println("要删除的目录在hdfs上不存在");
        }
    }

（3）在hdfs上创建一个文件，并写入指定的内容

//在hdfs上创建一个文件，并写入指定的内容
    @Test
    public void createHdfsFile() throws IOException {

        //获取数据输出流对象
        FSDataOutputStream fsDataOutputStream = fileSystem.create(new Path("/api_file.txt"));

        //定义要输出文件的内容
        String line = "hello bigdata";

        //将指定内容写入文件
        fsDataOutputStream.write(line.getBytes(StandardCharsets.UTF_8));

        //对数据输出流对象进行刷新
        fsDataOutputStream.flush();

        //关闭输出流对象
        fsDataOutputStream.close();
    }

（4）对hdfs上的文件修改其路径和名称

//对hdfs上的文件修改其路径和名称
    @Test
    public void moveHdfsFile() throws IOException {

        //文件的路径
        Path src = new Path("/api_file.txt");
        //文件的新路径
        Path dst = new Path("/hdfs_api/api_file_new.txt");

        fileSystem.rename(src,dst);
    }

（5）读取hdfs上的文件内容

  //读取hdfs上的文件内容
    @Test
    public void readHdfsFile() throws IOException {

        //获取数据输入流数据
        FSDataInputStream fsDataInputStream = fileSystem.open(new Path("/hdfs_api/api_file_new.txt"));
        //通过IO工具类读取文件中的数据

        //System.out代表的是PrintStream对象 该对象是OutPutStream类的间接子类

        IOUtils.copyBytes(fsDataInputStream,System.out,2048,false);

        //手动换行
        System.out.println("\n");

    }

(6)从本地上传文件到hdfs上

   //从本地上传文件到hdfs上
    public void uploadFile() throws IOException {

        //文件的本地路径(words.txt文件得存在)
        Path src = new Path("D:\\大数据实践\\words.txt");
        //文件上传到hdfs的路径
        Path dst = new Path("hdfs_api");

        //文件上传成功后，本地文件是否删除
        boolean delSrc = true;
        
        //上传文件后是否覆盖
        boolean overwrite = true;
        
        fileSystem.copyFromLocalFile(delSrc,overwrite,src,dst);
    }

--查看word.txt的内容
hadoop fs -cat /hdfs_api/w*.txt

hadoop fs -cat /hdfs_api/*

（7）从hdfs上下载文件到本地

//从hdfs上下载文件到本地
    @Test
    public void downloadFile() throws IOException {

        //在hdfs上的文件
        Path src = new Path("/hdfs_api/api_file_new.txt");

        //文件的下载路径
        Path dst = new Path("D:\\大数据实践\\api_file_new.txt");

        //文件下载后，是否删除hdfs上的原文件
        boolean delSrc = false;

        //true:下载的文件不会存在crc校验文件
        //false:下载的文件存在crc校验文件
        boolean useRawLocalFileSystem = true;

        fileSystem.copyToLocalFile(delSrc,src,dst,useRawLocalFileSystem);
    }

（8）查看hdfs上的文件信息

//查看hdfs上的文件信息
    @Test
    public void queryHdfsFileInfo() throws IOException {
        //查询的起始路径
        Path path = new Path("/");

        //是否递归查询
        boolean recursive = true;

        //获取迭代器
        RemoteIterator<LocatedFileStatus> listIterator = fileSystem.listFiles(path,recursive);

        //进行遍历

        //判断迭代器中是否还有需要迭代的元素
        while (listIterator.hasNext()){

            //获取迭代器中需要迭代的元素
            LocatedFileStatus fileStatus = listIterator.next();

            //获取文件的路径
            Path filePatn = fileStatus.getPath();
            System.out.println("文件的路径是："+filePatn);

            //获取文件的权限
            FsPermission permission = fileStatus.getPermission();
            System.out.println("文件的权限是："+permission);

            //获取文件的所属用户
            String owner = fileStatus.getOwner();
            System.out.println("文件的所属用户是："+owner);

            //获取文件的所属用户的用户组
            String group = fileStatus.getGroup();
            System.out.println("文件的所属用户的用户组是："+group);

            //获取文件的副本数
            short replication = fileStatus.getReplication();
            System.out.println("文件的副本数是："+replication);

            //获取文件的块大小
            long blockSize = fileStatus.getBlockSize();
            System.out.println("文件的块大小是："+blockSize/1024/1024+"MB");

            System.out.println("--------------------------");
        }
    }

（9）总代码

package com.igeekhome.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;

//调用hdfs的java api
public class HdfsApiClient {

    //文件系统对象
    FileSystem fileSystem= null;

    //初始化hdfs文件系统
    @Before
    public void init() throws URISyntaxException, IOException, InterruptedException {

        //创建文件系统的配置对象
        Configuration conf = new Configuration();

        //创建系统文件对象
        fileSystem=FileSystem.get(new URI("hdfs://bigdata02:8020"),conf,"root");

        System.out.println("hdfs文件系统初始化成功！");
    }

    //创建hdfs目录
    @Test
    public void createPath() throws IOException {

        boolean result = fileSystem.mkdirs(new Path("/hdfs_api"));
        if (result){
            System.out.println("创建目录成功！");
        }else {
            System.out.println("创建目录失败！");
        }
    }

    //删除hdfs目录
    @Test
    public void deletePath() throws IOException {

        Path deletePath = new Path("hdfs_api");

        //判断hdfs上是否存在该目录
        if(fileSystem.exists(deletePath)){
            //目录存在，进行删除
            //delete(Path f, boolean recursive)
            //第一个参数是删除的路径，第二个参数设置是否递归删除
            boolean result = fileSystem.delete(deletePath,true);
            System.out.println(result == true ? "删除目录成功" : "删除目录失败");
        }else{
            System.out.println("要删除的目录在hdfs上不存在");
        }
    }

    //在hdfs上创建一个文件，并写入指定的内容
    @Test
    public void createHdfsFile() throws IOException {

        //获取数据输出流对象
        FSDataOutputStream fsDataOutputStream = fileSystem.create(new Path("/api_file.txt"));

        //定义要输出文件的内容
        String line = "hello bigdata";

        //将指定内容写入文件
        fsDataOutputStream.write(line.getBytes(StandardCharsets.UTF_8));

        //对数据输出流对象进行刷新
        fsDataOutputStream.flush();

        //关闭输出流对象
        fsDataOutputStream.close();
    }

    //对hdfs上的文件修改其路径和名称
    @Test
    public void moveHdfsFile() throws IOException {

        //文件的路径
        Path src = new Path("/api_file.txt");
        //文件的新路径
        Path dst = new Path("/hdfs_api/api_file_new.txt");

        fileSystem.rename(src,dst);
    }

    //读取hdfs上的文件内容
    @Test
    public void readHdfsFile() throws IOException {

        //获取数据输入流数据
        FSDataInputStream fsDataInputStream = fileSystem.open(new Path("/hdfs_api/api_file_new.txt"));
        //通过IO工具类读取文件中的数据

        //System.out代表的是PrintStream对象 该对象是OutPutStream类的间接子类

        IOUtils.copyBytes(fsDataInputStream,System.out,2048,false);

        //手动换行
        System.out.println("\n");

    }


    //从本地上传文件到hdfs上
    @Test
    public void uploadFile() throws IOException {

        //文件的本地路径
        Path src = new Path("D:\\大数据实践\\words.txt");
        //文件上传到hdfs的路径
        Path dst = new Path("hdfs_api");

        //文件上传成功后，本地文件是否删除
        boolean delSrc = true;

        //上传文件后是否覆盖
        boolean overwrite = true;

        fileSystem.copyFromLocalFile(delSrc,overwrite,src,dst);
    }

    //从hdfs上下载文件到本地
    @Test
    public void downloadFile() throws IOException {

        //在hdfs上的文件
        Path src = new Path("/hdfs_api/api_file_new.txt");

        //文件的下载路径
        Path dst = new Path("D:\\大数据实践\\api_file_new.txt");

        //文件下载后，是否删除hdfs上的原文件
        boolean delSrc = false;

        //true:下载的文件不会存在crc校验文件
        //false:下载的文件存在crc校验文件
        boolean useRawLocalFileSystem = true;

        fileSystem.copyToLocalFile(delSrc,src,dst,useRawLocalFileSystem);
    }

    //查看hdfs上的文件信息
    @Test
    public void queryHdfsFileInfo() throws IOException {
        //查询的起始路径
        Path path = new Path("/");

        //是否递归查询
        boolean recursive = true;

        //获取迭代器
        RemoteIterator<LocatedFileStatus> listIterator = fileSystem.listFiles(path,recursive);

        //进行遍历

        //判断迭代器中是否还有需要迭代的元素
        while (listIterator.hasNext()){

            //获取迭代器中需要迭代的元素
            LocatedFileStatus fileStatus = listIterator.next();

            //获取文件的路径
            Path filePatn = fileStatus.getPath();
            System.out.println("文件的路径是："+filePatn);

            //获取文件的权限
            FsPermission permission = fileStatus.getPermission();
            System.out.println("文件的权限是："+permission);

            //获取文件的所属用户
            String owner = fileStatus.getOwner();
            System.out.println("文件的所属用户是："+owner);

            //获取文件的所属用户的用户组
            String group = fileStatus.getGroup();
            System.out.println("文件的所属用户的用户组是："+group);

            //获取文件的副本数
            short replication = fileStatus.getReplication();
            System.out.println("文件的副本数是："+replication);

            //获取文件的块大小
            long blockSize = fileStatus.getBlockSize();
            System.out.println("文件的块大小是："+blockSize/1024/1024+"MB");

            System.out.println("--------------------------");
        }
    }

    //关闭文件系统对象
    @After
    public void close() throws IOException {

        if(fileSystem !=null){
            fileSystem.close();
            System.out.println("hdfs文件系统已关闭！");
        }
    }
}

_TuZero

关注

5
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
大数据实验Day05----通过JavaAPI操作

（3）点击winutils.exe，检查是否配置成功。（1）将D:\hadoop\hadoop3.1.3\bin中的hadoop.dll文件复制到C:\Windows\System32中。2.在IDEA中操作hdfs文件（windows主机）（3）在hdfs上创建一个文件，并写入指定的内容。（4）对hdfs上的文件修改其路径和名称。（5）在pom.xml中添加依赖。（7）从hdfs上下载文件到本地。（5）读取hdfs上的文件内容。（8）查看hdfs上的文件信息。（2）删除hdfs目录。
复制链接

扫一扫