文章目录
前言
可能很多小伙伴用java写过MapReuce的代码,但是对于操作HDFS相信很多小伙伴都是用Linux命令直接执行的,所以对于HDFS底层的读写可能并不是十分了解。用java写这些是为了帮助自己更加深入的了解HDFS的底层读写过程,希望可以帮助你更多的了解到HDFS的读写过程。
下面用测试窗口演示
Denpency
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
</dependencies>
一、初始化环境
- 这里采用创建FileSystem实例对象统一调用;
- 注意创建FileSystem时的用户设置(
设置不当可能部分测试无法完成
);
package org.example;
import static org.junit.Assert.assertTrue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.*;
import java.net.URI;
/**
* Unit test for simple App.
*/
public class AppTest
{
public static String path = "hdfs://sole:9000";
Configuration conf = null;
FileSystem fs = null;
FSDataInputStream fsdis = null;
FSDataOutputStream fsdos = null;
/**
* 创建FileSystem实例
*/
@Before
public void before(){
System.out.println("开始运行...");
//设置登录用户的另外一种方式
//这里设置之后,下面创建FileSystem时则无需创建
// System.setProperty("HADOOP_USER_NAME", "root");
conf = new Configuration();
conf.setBoolean("dfs.support.append", true);
conf.set("dfs.client.block.write.replace-datanode-on-failure.policy","NEVER");
conf.set("dfs.client.block.write.replace-datanode-on-failure.enable","true");
try {
//设置hdfs路径,config,登录用户
//用户可以不传,默认当前登录用户名
fs = FileSystem.get(URI.create(path), conf,"root");
} catch (Exception e) {
e.printStackTrace();
}
}
@After
public void after() throws IOException {
if(fsdis!=null){
fsdis.close();
}
if(fsdis!=null){
fsdos.close();
}
fs.close();
conf = null;
System.out.println("运行结束!");
}
}
二、测试方法
2.1 创建文件夹
- FsPermission :设置权限【u:useraction,g:groupaction,o:otheraction】
@Test
public void createDir() throws IOException {
Path path = new Path("/wsj/test");
if(!fs.exists(path)) {
//权限设置
FsPermission permission =new FsPermission((short)00777);
//权限设置的另外一种方式
// FsPermission permission = new FsPermission(
// FsAction.ALL, //用户权限
// FsAction.ALL, //组权限
// FsAction.ALL,//其他权限
// true);
fs.mkdirs(path, permission);
}else{
System.out.println("Path: "+path.toString()+" has exist");
}
}
2.2 创建文件并写入数据
- 继承关系:FSDataOutputStream --> DataOutputStream --> FilterOutputStream --> OutputStream;
@Test
public void createFile() throws IOException {
FSDataOutputStream fsdos = fs.create(new Path("/wsj/firstFile.txt"));
String line = "First write into hdfs!\n";
fsdos.write(line.getBytes());
fsdos.flush();
}
2.3 在文件后追加内容
- append():文件末尾追加;
@Test
public void append() throws IOException {
Path path = new Path("/wsj/firstFile.txt");
fsdos = fs.append(path);
byte[] str = "this were appended to file\n".getBytes();
fsdos.write(str);
fsdos.flush();
fsdos.close();
}
2.4 查看HDFS文件
- IOUtils:是 Apache Commons IO 的一部分 ;类中的所有处理InputStream的方法都带有内部的缓冲区,所以我们不需要再使用
BufferedReader
或者BufferedInputStream
,默认的缓冲区大小为4K,不过我们也可以自定义它的大小;
@Test //查看hdfs文件
public void catFile() throws IOException {
fsdis = fs.open(new Path("/wsj/firstFile.txt"));
//fsdis:是FSDataInputStream类的对象,输入流
//System.out:java输出流
//20表示用来拷贝的buffer大小(buffer是缓冲区)–缓冲区大小
//true:是否关闭数据流,如果是false,就在finally里关掉
IOUtils.copyBytes(fsdis,System.out,20,true);
}
2.5 文件(夹)重命名
- rename()方法调用;
@Test
public void renameFile() throws IOException {
String oldPath="/wsj/first.txt";
String newPath="/wsj/firstFile.txt";
boolean status = fs.rename(new Path(oldPath), new Path(newPath));
System.out.println("rename ? :" + status);
}
2.6 删除文件(夹)
- 调用delete()方法
@Test
public void deleteDir() throws IOException {
fs.delete(new Path("/wsj"));
}
2.7 文件上传
2.7.1 本地文件上传
@Test
public void loadLocalFile() throws IOException {
Path localPath = new Path("file:///e:/logs/log.log");
Path hdfsPath = new Path("/wsj/log.txt");
fs.copyFromLocalFile(localPath,hdfsPath);
}
2.7.2 上传带进度条
- new Progressable():重写方法;
@Test
public void copyWithProgress() throws Exception {
InputStream is = new BufferedInputStream(new FileInputStream(new File("E:\\ChromeDownload\\dbeaver-ce-6.0.0-win32.win32.x86_64.zip")));
FSDataOutputStream fsdos = fs.create(new Path("/wsj/dbeaver.win32.x86_64.zip"),
new Progressable() {
@Override
public void progress() {
System.out.print("=");
}
});
IOUtils.copyBytes(is,fsdos,4096,false);
}
2.8 下载文件
- 调用copyToLocalFile()方法;
@Test
public void copyToLocal() throws IOException {
Path hdfsPath = new Path("/events/data/test.csv");
Path localPath = new Path("e:/hadoop");
fs.copyToLocalFile(hdfsPath,localPath);
}
2.9 查看HDFS文件列表
@Test
public void showDirList() throws IOException {
FileStatus[] fileStatuses = fs.listStatus(new Path("/events/data"));
for (FileStatus fileStatus : fileStatuses) {
String isDir = fileStatus.isFile()?"文件":"文件夹";
short rep = fileStatus.getReplication();
long len = fileStatus.getLen();
Path path = fileStatus.getPath();
System.out.println(fileStatus.toString());
System.out.println(path+"\t"+isDir+"\t"+rep+"\t"+len);
}
}
2.10 查看DataNode信息
- 查询DateNode信息时,系统将校验登录账户的信息,所有初始化时我将用户设置为了
root
@Test //查看DataNode信息
public void showDataNodeInfo() throws IOException {
DistributedFileSystem dfs = (DistributedFileSystem) this.fs;
DatanodeInfo[] infos = dfs.getDataNodeStats();
for (DatanodeInfo info : infos) {
System.out.println("Name:"+info.getName());
System.out.println("BlockPoolUsed:"+info.getBlockPoolUsed());
System.out.println("CacheCapacity:"+info.getCacheCapacity());
System.out.println("DatanodeReport:"+info.getDatanodeReport());
System.out.println("DatanodeUuid:"+info.getDatanodeUuid());
System.out.println("HostName:"+info.getHostName());
System.out.println("InfoPort:"+info.getInfoPort());
}
dfs.close();
}
PS:如果有写错或者写的不好的地方,欢迎各位大佬在评论区留下宝贵的意见或者建议,敬上!如果这篇博客对您有帮助,希望您可以顺手帮我点个赞!不胜感谢!
原创作者:wsjslient |