HDFS Java API

61 篇文章 2 订阅
13 篇文章 2 订阅

HDFS Java API

package cn.lhz.util.hadoop;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DistributedFileSystem;

import java.io.IOException;
import java.net.URI;

/**
 * HDFS 工具类
 *
 * @author 李昊哲
 * @version 1.0.0
 */
public class HdfsUtil {

  /**
   * 获取 HDFS 分布式文件系统
   *
   * @return 分布式文件系统
   * @throws IOException IOException
   */
  public static DistributedFileSystem getDfs() throws IOException {
    // 通过这种方式设置java客户端身份
    System.setProperty("HADOOP_USER_NAME", "root");
    // 配置参数
    Configuration conf = new Configuration();
    DistributedFileSystem dfs = new DistributedFileSystem();
    String nameService = conf.get("dfs.nameservices");
    String hdfsRPCUrl = "hdfs://" + nameService + ":" + 8020;
    // 分布式文件系初始化
    dfs.initialize(URI.create(hdfsRPCUrl), conf);
    return dfs;
  }

  /**
   * 断开 HDFS 连接
   *
   * @param dfs HDFS 分布式文件系统
   * @throws IOException IOException
   */
  public static void close(DistributedFileSystem dfs) throws IOException {
    dfs.close();
  }
}

package cn.lhz.hadoop;

import cn.lhz.util.hadoop.HdfsUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.junit.jupiter.api.Test;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

/**
 * HDFS 常用操作
 *
 * @author 李昊哲
 * @version 1.0.0
 */
@Slf4j
public class HdfsTest {

  @Test
  public void test01() {
    // 测试连接HDFS
    // hdfs haadmin -getServiceState nn1
    // hdfs haadmin -getServiceState nn2
    try {
      // 适用于单机模式和伪分布式集群
      URI uri = new URI("hdfs://spark01:8020");
      // 配置项
      Configuration conf = new Configuration();
      // hadoop 用户名
      String user = "root";
      // 获取hdfs资源
      FileSystem fs = FileSystem.get(uri, conf, user);
      // 判断 hdfs 根下是否存在 目录或文件 lihaozhe
      boolean exists = fs.exists(new Path("/lihaozhe"));
      // 释放资源
      log.info("exists:{}", exists);
      fs.close();
    } catch (URISyntaxException | IOException | InterruptedException e) {
      throw new RuntimeException(e);
    }
  }

  @Test
  public void test02() {
    // 无配置文件版本 分布式文件系统
    try {
      String nameservices = "lihaozhe";
      String[] namenodesAddr = {"spark01:8020", "spark02:8020"};
      String[] namenodes = {"nn1", "nn2"};
      Configuration conf = new Configuration();
      conf.set("fs.defaultFS", "hdfs://" + nameservices);
      conf.set("dfs.nameservices", nameservices);
      conf.set("dfs.ha.namenodes." + nameservices, namenodes[0], namenodes[1]);
      conf.set("dfs.namenode.rpc-address." + nameservices + "." + namenodes[0], namenodesAddr[0]);
      conf.set("dfs.namenode.rpc-address." + nameservices + "." + namenodes[1], namenodesAddr[1]);
      conf.set("dfs.client.failover.proxy.provider." + nameservices, "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");
      String hdfsRPCUrl = "hdfs://" + nameservices + ":" + 8020;
      // 通过这种方式设置java客户端身份
      System.setProperty("HADOOP_USER_NAME", "root");
      // 新建一个分布式文件系统
      DistributedFileSystem dfs = new DistributedFileSystem();
      // 分布式文件系初始化
      dfs.initialize(URI.create(hdfsRPCUrl), conf);
      // 判断 hdfs 根下是否存在 目录或文件 lihaozhe
      boolean exists = dfs.exists(new Path("/lihaozhe"));
      // 释放资源
      dfs.close();
      log.info("exists:{}", exists);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  @Test
  public void test03() {
    // 有配置文件版本 分布式文件系统
    try {
      // 通过这种方式设置java客户端身份
      System.setProperty("HADOOP_USER_NAME", "root");
      // 配置参数
      Configuration conf = new Configuration();
      DistributedFileSystem dfs = new DistributedFileSystem();
      String nameService = conf.get("dfs.nameservices");
      String hdfsRPCUrl = "hdfs://" + nameService + ":" + 8020;
      // 分布式文件系初始化
      dfs.initialize(URI.create(hdfsRPCUrl), conf);
      // 判断 hdfs 根下是否存在 目录或文件 lihaozhe
      boolean exists = dfs.exists(new Path("/lihaozhe"));
      // 释放资源
      dfs.close();
      log.info("exists:{}", exists);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  @Test
  public void test04() {
    // HdfsUtil 工具类
    try {
      DistributedFileSystem dfs = HdfsUtil.getDfs();
      // 判断 hdfs 根下是否存在 目录或文件 lihaozhe
      boolean exists = dfs.exists(new Path("/lihaozhe"));
      dfs.close();
      // 释放资源
      log.info("exists:{}", exists);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  @Test
  public void test05() throws IOException {
    DistributedFileSystem dfs = HdfsUtil.getDfs();
    FileStatus[] listStatus = dfs.listStatus(new Path("/"));
    dfs.close();
    for (FileStatus fileStatus : listStatus) {
      log.info("fileStatus:{}", fileStatus.getPath());
    }
  }

  @Test
  public void test06() throws IOException {
    DistributedFileSystem dfs = HdfsUtil.getDfs();
    Path path = new Path("/xiaoshuo");
    if (!dfs.exists(path)) {
      System.out.println(dfs.mkdirs(path) ? "创建成功" : "创建失败");
    } else {
      System.out.println(path.getName() + "已经存在无需重复创建");
    }
    dfs.close();
  }

  @Test
  public void test07() throws IOException {
    DistributedFileSystem dfs = HdfsUtil.getDfs();
    Path src = new Path("/xiaoshuo");
    Path dst = new Path("/小说");
    if (dfs.exists(src)) {
      System.out.println(dfs.rename(src, dst) ? "修改成功" : "修改失败");
    } else {
      System.out.println(src.getName() + "文件不存在");
    }
    dfs.close();
  }

  @Test
  public void test08() throws IOException {
    DistributedFileSystem dfs = HdfsUtil.getDfs();
    Path path = new Path("/小说");
    if (dfs.exists(path)) {
      System.out.println(dfs.delete(path, true) ? "删除成功" : "删除失败");
    } else {
      System.out.println(path.getName() + "文件不存在");
    }
    dfs.close();
  }

  @Test
  public void test09() throws IOException {
    DistributedFileSystem dfs = HdfsUtil.getDfs();
    Path path = new Path("/小说");
    Path src = new Path("三国演义.txt");
    Path dst = new Path("/小说/三国演义.txt");
    if (!dfs.exists(path)) {
      // 如果上传父目录不存在 先创建
      if (dfs.mkdirs(path)) {
        // 父目录创建成功后开上传
        dfs.copyFromLocalFile(src, dst);
      }
    } else {
      // 父目录存在 直接上传
      dfs.copyFromLocalFile(src, dst);
    }
    System.out.println(dfs.exists(dst) ? "上传成功" : "上传失败");
    dfs.close();
  }

  @Test
  public void test10() throws IOException {
    DistributedFileSystem dfs = HdfsUtil.getDfs();
    Path src = new Path("/小说/三国演义.txt");
    Path dst = new Path("三国.txt");
    if (dfs.exists(src)) {
      dfs.copyToLocalFile(src, dst);
      File file = new File(dst.getName());
      System.out.println(file.exists() ? "下载成功" : "下载失败");
    } else {
      System.out.println("文件不存在无法下载");
    }
    dfs.close();
  }
}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>cn.lhz</groupId>
  <artifactId>hadoop</artifactId>
  <version>1.0.0</version>

  <properties>
    <jdk.version>8</jdk.version>
    <!-- 公共配置 -->
    <maven.compiler.source>8</maven.compiler.source>
    <maven.compiler.target>8</maven.compiler.target>
    <maven.compiler.compilerVersion>8</maven.compiler.compilerVersion>
    <maven.compiler.encoding>utf-8</maven.compiler.encoding>
    <project.build.sourceEncoding>utf-8</project.build.sourceEncoding>
    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
    <maven.test.failure.ignore>true</maven.test.failure.ignore>
    <maven.test.skip>true</maven.test.skip>

    <junit.version>5.10.2</junit.version>
    <lombok.version>1.18.32</lombok.version>
    <commons-lang3.version>3.14.0</commons-lang3.version>
    <commons-io.version>2.16.1</commons-io.version>
    <jackson.version>2.17.1</jackson.version>
    <slf4j.version>2.0.13</slf4j.version>
    <hadoop.version>3.3.6</hadoop.version>
    <guava.version>33.0.0-jre</guava.version>
    <mysql.version>8.4.0</mysql.version>
    <protobuf.version>4.27.1</protobuf.version>
  </properties>

  <dependencies>
    <dependency>
      <groupId>org.junit.jupiter</groupId>
      <artifactId>junit-jupiter-engine</artifactId>
      <version>${junit.version}</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.junit.jupiter</groupId>
      <artifactId>junit-jupiter-api</artifactId>
      <version>${junit.version}</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.projectlombok</groupId>
      <artifactId>lombok</artifactId>
      <version>${lombok.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.commons</groupId>
      <artifactId>commons-lang3</artifactId>
      <version>${commons-lang3.version}</version>
    </dependency>
    <dependency>
      <groupId>commons-io</groupId>
      <artifactId>commons-io</artifactId>
      <version>${commons-io.version}</version>
    </dependency>
    <!--jackson-->
    <dependency>
      <groupId>com.fasterxml.jackson.core</groupId>
      <artifactId>jackson-core</artifactId>
      <version>${jackson.version}</version>
    </dependency>
    <dependency>
      <groupId>com.fasterxml.jackson.core</groupId>
      <artifactId>jackson-annotations</artifactId>
      <version>${jackson.version}</version>
    </dependency>
    <dependency>
      <groupId>com.fasterxml.jackson.core</groupId>
      <artifactId>jackson-databind</artifactId>
      <version>${jackson.version}</version>
    </dependency>
    <dependency>
      <groupId>com.fasterxml.jackson.datatype</groupId>
      <artifactId>jackson-datatype-jsr310</artifactId>
      <version>${jackson.version}</version>
    </dependency>
    <dependency>
      <groupId>org.slf4j</groupId>
      <artifactId>slf4j-api</artifactId>
      <version>${slf4j.version}</version>
    </dependency>
    <dependency>
      <groupId>org.slf4j</groupId>
      <artifactId>slf4j-simple</artifactId>
      <version>${slf4j.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>${hadoop.version}</version>
    </dependency>
    <dependency>
      <groupId>com.google.guava</groupId>
      <artifactId>guava</artifactId>
      <version>${guava.version}</version>
    </dependency>
    <dependency>
      <groupId>com.mysql</groupId>
      <artifactId>mysql-connector-j</artifactId>
      <version>${mysql.version}</version>
    </dependency>
    <dependency>
      <groupId>com.google.protobuf</groupId>
      <artifactId>protobuf-java</artifactId>
      <version>${protobuf.version}</version>
    </dependency>
  </dependencies>
  <build>
    <finalName>${artifactId}</finalName>
    <!--<outputDirectory>../package</outputDirectory>-->
    <plugins>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-compiler-plugin</artifactId>
        <version>3.13.0</version>
        <configuration>
          <!-- 设置编译字符编码 -->
          <encoding>utf-8</encoding>
          <!-- 设置编译jdk版本 -->
          <source>${jdk.version}</source>
          <target>${jdk.version}</target>
        </configuration>
      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-clean-plugin</artifactId>
        <version>3.3.2</version>
      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-resources-plugin</artifactId>
        <version>3.3.1</version>
      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-jar-plugin</artifactId>
        <version>3.4.1</version>
      </plugin>
      <!-- 编译级别 -->
      <!-- 打包的时候跳过测试junit begin -->
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-surefire-plugin</artifactId>
        <version>3.2.5</version>
        <configuration>
          <skip>true</skip>
        </configuration>
      </plugin>
    </plugins>
  </build>
</project>
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

李昊哲小课

桃李不言下自成蹊

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值