Hadoop学习笔记系列:03-Hadoop-Hdfs的Java Api的常用操作

概述

本文描述的是在windows环境中使用java的api来操作hadoop的hdfs文件系统。hadoop集群安装在远程Linux环境中。

一、添加pom.xml依赖

<dependencies>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>3.3.1</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>3.3.1</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs-client</artifactId>
        <version>3.3.1</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs</artifactId>
        <version>3.3.1</version>
    </dependency>

</dependencies>

HDFS的java API 操作

创建Hdfs的FileSystem 环境

  • 方式一:使用配置文件的方式

将Hadoop集群的如下配置文件复制到项目的resource目录中,然后使用如下代码进行连接远程hadoop的hdfs文件系统。

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;

Configuration conf = new Configuration();
conf.addResource("core-site.xml");
conf.addResource("hdfs-site.xml");
conf.addResource("mapred-site.xml");
conf.addResource("yarn-site.xml");
FileSystem fs = FileSystem.get(conf);
  • 方式二:使用代码中的配置的方式
//设置hadoop的NameNode的节点,注意不能使用standby节点
conf.set("fs.defaultFS","hdfs://192.168.0.115:8020");

//设置系统环境变量,操作hadoop的用户
System.setProperty("HADOOP_USER_NAME","root");

//设置Hadoop的dfs客户端使用hostname访问datanode
conf.set("dfs.client.use.datanode.hostname", "true");
FileSystem fs = FileSystem.get(conf);

//设置副本数
conf.set("dfs.replication", "2");

//设置块大小
conf.set("dfs.block.size", "64m");

TIPS:参数优先级:

  • 1、客户端代码中设置的值
  • 2、classpath下的用户自定义配置文件
  • 3、然后是服务器的默认配置

上述FileSystem的环境创建好了之后,就可以使用fs的对象来进行操作了。

创建目录

//创建目录
Boolean flag = fs.mkdirs(new Path("/root/data"));
System.out.println(flag);

判断文件或者目录是否存在

Boolean isExists = fs.exists(new Path("/root/data/student2.txt"));
System.out.println(isExists);

创建文件

fs.create(new Path("/root/data/student2.txt"));

上传本地文件到hdfs

//上传文件
String srcFilePath = "D:\\javaworkspace\\BigData\\Hadoop\\HadoopApp\\HdfsApp\\src\\main\\java\\com\\hjt\\yxh\\hw\\HdfsApiTest.java";
String remoteFilePath = "/root/data/";
fs.copyFromLocalFile(new Path(srcFilePath),new Path(remoteFilePath));

下载文件到本地

//下载文件
fs.copyToLocalFile(new Path("/root/data"),new Path("D:\\javaworkspace\\BigData\\Hadoop\\MapReduceLearn\\src\\main\\resources\\"));

删除文件或目录

//删除文件,如果路径是目录,第二个参数控制是否递归删除
fs.delete(new Path("/root/data/"),true);

查看文件列表信息

//查看文件列表
RemoteIterator<LocatedFileStatus> fileList = fs.listFiles(new Path("/root/data/"), true);
while (fileList.hasNext()){
    System.out.println(fileList.next().toString());
}

使用文件流的方式写hdfs文件


//使用流的方式写hdfs文件
FSDataOutputStream fsDataOutputStream = fs.append(new Path("/root/data/student2.txt"));
FileInputStream fileinputStream = new FileInputStream("D:\\javaworkspace\\BigData\\Hadoop\\HadoopApp\\HdfsApp\\src\\main\\java\\com\\hjt\\yxh\\hw\\HdfsApiTest.java");
byte[] buffer = new byte[1024*1024];
int read = 0;
while ((read=fileinputStream.read(buffer)) > 0){
    fsDataOutputStream.write(buffer,0,read);
}

//或者使用IOUtil的方式
InputStream inputStream = new BufferedInputStream(fileinputStream);
IOUtils.copyBytes(inputStream,fsDataOutputStream,conf);
//关闭流
fileinputStream.close();
fsDataOutputStream.close();

使用文件流的方式下载文件

//使用流的方式下载文件
FileOutputStream fileOutputStream = new FileOutputStream("./test.txt");
FSDataInputStream fsDataInputStream = fs.open(new Path("/root/data/student2.txt"));
byte[] buffer2 = new byte[1024*1024];
int read2 = 0;
while ((read2 = fsDataInputStream.read(buffer2)) > 0){
    fileOutputStream.write(buffer2,0,read2);
}
//或者使用IOUtils的方式
OutputStream outputStream = new BufferedOutputStream(fileOutputStream);
IOUtils.copyBytes(fsDataInputStream,outputStream,conf);

fileOutputStream.close();
fsDataInputStream.close();

完整的代码示例

package com.hjt.yxh.hw;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import java.io.*;

public class HdfsApiTest {

    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();

        conf.addResource("core-site.xml");
        conf.addResource("hdfs-site.xml");
        conf.addResource("mapred-site.xml");
        conf.addResource("yarn-site.xml");
//        conf.set("fs.defaultFS","hdfs://192.168.0.115:8020");
        System.setProperty("HADOOP_USER_NAME","root");
//        conf.set("dfs.client.use.datanode.hostname", "true");
        FileSystem fs = FileSystem.get(conf);


        //创建目录
        Boolean flag = fs.mkdirs(new Path("/root/data"));
        System.out.println(flag);

        //判断文件是否存在
        Boolean isExists = fs.exists(new Path("/root/data/student2.txt"));
        System.out.println(isExists);
        if(isExists == false){
            fs.create(new Path("/root/data/student2.txt"));
        }



        //查看文件信息
        FileStatus fileStatus = fs.getFileStatus(new Path("/root/data/student2.txt"));
        System.out.println(fileStatus.toString());

        //下载文件
        fs.copyToLocalFile(new Path("/root/data/"),new Path("D:\\javaworkspace\\BigData\\Hadoop\\MapReduceLearn\\src\\main\\resources\\"));


        //上传文件
        String srcFilePath = "D:\\javaworkspace\\BigData\\Hadoop\\HadoopApp\\HdfsApp\\src\\main\\java\\com\\hjt\\yxh\\hw\\HdfsApiTest.java";
        String remoteFilePath = "/root/data/";
        fs.copyFromLocalFile(new Path(srcFilePath),new Path(remoteFilePath));

        //删除文件
        fs.delete(new Path("/root/data/"),true);


        //查看文件列表
        RemoteIterator<LocatedFileStatus> fileList = fs.listFiles(new Path("/root/data/"), true);
        while (fileList.hasNext()){
            System.out.println(fileList.next().toString());
        }

        //使用流的方式写hdfs文件
        FSDataOutputStream fsDataOutputStream = fs.append(new Path("/root/data/student2.txt"));
        FileInputStream fileinputStream = new FileInputStream("D:\\javaworkspace\\BigData\\Hadoop\\HadoopApp\\HdfsApp\\src\\main\\java\\com\\hjt\\yxh\\hw\\HdfsApiTest.java");
        byte[] buffer = new byte[1024*1024];
        int read = 0;
        while ((read=fileinputStream.read(buffer)) > 0){
            fsDataOutputStream.write(buffer,0,read);
        }
        //或者使用IOUtil的方式
        InputStream inputStream = new BufferedInputStream(fileinputStream);
        IOUtils.copyBytes(inputStream,fsDataOutputStream,conf);
        //关闭流
        fileinputStream.close();
        fsDataOutputStream.close();

        //使用流的方式下载文件
        FileOutputStream fileOutputStream = new FileOutputStream("./test.txt");
        FSDataInputStream fsDataInputStream = fs.open(new Path("/root/data/student2.txt"));
        byte[] buffer2 = new byte[1024*1024];
        int read2 = 0;
        while ((read2 = fsDataInputStream.read(buffer2)) > 0){
            fileOutputStream.write(buffer2,0,read2);
        }
        //或者使用IOUtils的方式
        OutputStream outputStream = new BufferedOutputStream(fileOutputStream);
        IOUtils.copyBytes(fsDataInputStream,outputStream,conf);

        fileOutputStream.close();
        fsDataInputStream.close();


        fs.close();

    }
}


报错:java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset.

Exception in thread "main" java.lang.RuntimeException: java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset. -see https://wiki.apache.org/hadoop/WindowsProblems
	at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:736)
	at org.apache.hadoop.util.Shell.getSetPermissionCommand(Shell.java:271)
	at org.apache.hadoop.util.Shell.getSetPermissionCommand(Shell.java:287)
	at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:978)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkOneDirWithMode(RawLocalFileSystem.java:660)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:700)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:699)
	at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:672)
	at org.apache.hadoop.fs.ChecksumFileSystem.mkdirs(ChecksumFileSystem.java:788)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:513)
	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:500)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1064)
	at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:417)
	at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:390)
	at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:340)
	at org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:2547)
	at org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:2516)
	at org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:2492)
	at com.hjt.yxh.hw.HdfsApiTest.main(HdfsApiTest.java:27)
Caused by: java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset. -see https://wiki.apache.org/hadoop/WindowsProblems
	at org.apache.hadoop.util.Shell.fileNotFoundException(Shell.java:548)
	at org.apache.hadoop.util.Shell.getHadoopHomeDir(Shell.java:569)
	at org.apache.hadoop.util.Shell.getQualifiedBin(Shell.java:592)
	at org.apache.hadoop.util.Shell.<clinit>(Shell.java:689)
	at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:79)
	at org.apache.hadoop.fs.FileSystem$Cache$Key.<init>(FileSystem.java:3741)
	at org.apache.hadoop.fs.FileSystem$Cache$Key.<init>(FileSystem.java:3736)
	at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3520)
	at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:540)
	at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:288)
	at com.hjt.yxh.hw.HdfsApiTest.main(HdfsApiTest.java:14)
Caused by: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset.
	at org.apache.hadoop.util.Shell.checkHadoopHomeInner(Shell.java:468)
	at org.apache.hadoop.util.Shell.checkHadoopHome(Shell.java:439)
	at org.apache.hadoop.util.Shell.<clinit>(Shell.java:516)
	... 7 more

报错解决

日志描述内容就是,没有设置 HADOOP_HOME 和 hadoop.home.dir 两项。而这两项就是配置在本地环境变量中的 Hadoop 地址,也就是需要我们在本地搭建Hadoop环境。

一、如果是远程连接Linux上的Hadoop集群,是不需要在本地再下载hadoop,只要下载winutils文件,然后配置环境变量,最后再把hadoop.dll文件放到 C:/windows/system32 下就可以了

winutils的安装与配置见博客描述
上述博客写的已经非常详细了,我就不在一一赘述。

报错: Permission denied: user=TianTian, access=WRITE, inode=“/root”:root:supergroup:drwxr-xr-x

Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Permission denied: user=TianTian, access=WRITE, inode="/root":root:supergroup:drwxr-xr-x
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:504)
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:336)
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermissionWithContext(FSPermissionChecker.java:360)
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:240)
	at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1939)
	at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1923)
	at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkAncestorAccess(FSDirectory.java:1882)
	at org.apache.hadoop.hdfs.server.namenode.FSDirMkdirOp.mkdirs(FSDirMkdirOp.java:60)
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:3410)
	at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:1170)
	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.mkdirs(ClientNamenodeProtocolServerSideTranslatorPB.java:740)
	at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
	at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:600)
	at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:568)
	at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:552)
	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1093)
	at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1035)
	at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:963)
	at java.base/java.security.AccessController.doPrivileged(AccessController.java:691)
	at java.base/javax.security.auth.Subject.doAs(Subject.java:425)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878)
	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2966)

	at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1573)
	at org.apache.hadoop.ipc.Client.call(Client.java:1519)
	at org.apache.hadoop.ipc.Client.call(Client.java:1416)
	at org.apache.hadoop.ipc.ProtobufRpcEngine2$Invoker.invoke(ProtobufRpcEngine2.java:242)
	at org.apache.hadoop.ipc.ProtobufRpcEngine2$Invoker.invoke(ProtobufRpcEngine2.java:129)
	at com.sun.proxy.$Proxy12.mkdirs(Unknown Source)
	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.mkdirs(ClientNamenodeProtocolTranslatorPB.java:674)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:64)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:564)
	at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)
	at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)
	at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)
	at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)
	at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)
	at com.sun.proxy.$Proxy13.mkdirs(Unknown Source)
	at org.apache.hadoop.hdfs.DFSClient.primitiveMkdir(DFSClient.java:2499)
	... 8 more

报错解决:

设置一下环境变量HADOOP_USER_NAME 为root

System.setProperty("HADOOP_USER_NAME","root");
  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值