hdfs 文件系统操作
命令行操作
文章连接 https://segmentfault.com/a/1190000002672666
补充:直接使用 hdfs 相关命令需要提前配置好 hadoop 环境变量
hadoop 命令前缀可以简化为 hdfs
fs 命令前缀可以写为 dfs
API 操作
HDFS 的 API 操作主要设计的核心类有三个类: Configuration(hdfs 连接配置类),一个为 FileSystem 类(hdfs 文件系统类)
一个为 Ioutils 工具类。
示例代码
package com.stack.file;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
/**
* create by stack on 2020/9/8
* hdfs 文件系统操作时,只能使用 hfds 文件系统存在的用户或者文件夹的使用权限为任意用户,否则会产生安全权限的问题
* @author stack
*/
public class TestFile {
@Test
public void test01() throws IOException {
// 获取hdfs 配置对象
Configuration configuration=new Configuration();
// 设置配置参数
configuration.set("fs.defaultFS","hdfs://localhost:9000");
configuration.set("dfs.replication","1");
// 创建一个 hdfs 文件系统的连接
FileSystem fileSystem = FileSystem.get(configuration);
// 打印一下hdfs文件系统的信息
System.out.println(fileSystem);
}
@Test
public void test02() throws IOException {
Configuration configuration=new Configuration();
FileSystem fileSystem = FileSystem.get(configuration);
System.out.println(fileSystem);
}
@Test
public void test03() throws IOException {
Configuration configuration=new Configuration();
configuration.set("fs.defaultFS","hdfs://localhost:9000");
FileSystem fileSystem = FileSystem.get(configuration);
fileSystem.copyFromLocalFile(new Path("/Volumes/数据盘1/ideaproject/hdfsmodule/hdfstest/dir/hello.txt"),new Path("/hello4.txt"));
fileSystem.close();
}
@Test
public void test04() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration=new Configuration();
configuration.set("dfs.replication","1");
URI uri = new URI("hdfs://localhost:9000");
// 没有指定 hdfs 使用的用户时,默认采用本地系统登录的用户名。 没有指定相对路径是,默认会去自己配置的用户名的路径下存放文件 如/user/stack
FileSystem fileSystem = FileSystem.get(uri, configuration, "stack ");
fileSystem.copyFromLocalFile(new Path("/Volumes/数据盘1/ideaproject/hdfsmodule/hdfstest/dir/hello.txt"),new Path("/hello1.txt"));
fileSystem.close();
}
@Test
public void test05() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration=new Configuration();
configuration.set("dfs.replication","1");
URI uri=new URI("hdfs://localhost:9000");
// 不存在的用户会去报安全问题
FileSystem fileSystem = FileSystem.get(uri, configuration, "stack");
boolean mkdirs = fileSystem.mkdirs(new Path("/user/hadoop"));
System.out.println(mkdirs==true?"创建成功":"创建失败");
fileSystem.close();
}
@Test
public void test06() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration=new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://localhost:9000"), configuration, "stack");
// 删除文件,相当于 rm ,第二个参数为是否 级联删除
boolean delete = fileSystem.delete(new Path("/user/hadoop"), true);
System.out.println(delete==true?"success":"fail");
fileSystem.close();
}
@Test
public void test07() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration=new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://localhost:9000"), configuration, "stack");
boolean rename = fileSystem.rename(new Path("/hello.txt"), new Path("hello1.txt"));
System.out.println(rename==true?"success":"fail");
// 释放资源
fileSystem.close();
}
/**
* 文件列表信息的获取与遍历
* @throws URISyntaxException
* @throws IOException
* @throws InterruptedException
*/
@Test
public void test08() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration=new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://localhost:9000"), configuration, "stack");
RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fileSystem.listFiles(new Path("/"), true);
while (locatedFileStatusRemoteIterator.hasNext()){
LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
long blockSize = next.getBlockSize();
long accessTime = next.getAccessTime();
String name = next.getPath().getName();
System.out.println("name : " + name);
System.out.println("blockSize : " + blockSize);
System.out.println("accessTime : " + accessTime);
BlockLocation[] blockLocations = next.getBlockLocations();
for (BlockLocation bl : blockLocations) {
System.out.println("block-offset:" + bl.getOffset());
String[] hosts = bl.getHosts();
for (String host : hosts) {
System.out.println("host -> "+host);
}
}
System.out.println("-----------------------------------");
}
fileSystem.close();
}
/**
* 判断文件类型
* @throws URISyntaxException
* @throws IOException
* @throws InterruptedException
*/
@Test
public void test9() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration=new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://localhost:9000"), configuration, "stack");
FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/"));
for (FileStatus fileStatus : fileStatuses) {
if(fileStatus.isFile()){
System.out.println(fileStatus.getPath()+"是文件");
}else {
System.out.println(fileStatus.getPath()+"是文件夹");
}
}
fileSystem.close();
}
/**
* 本地文件上创 相当于 copyLocalFle 操作
* @throws URISyntaxException
* @throws IOException
* @throws InterruptedException
*/
@Test
public void test10() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration=new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://localhost:9000"), configuration, "stack");
FSDataOutputStream fsDataOutputStream = fileSystem.create(new Path("/user/stack/hello3.txt"));
FileInputStream fileInputStream = new FileInputStream(new File("/Volumes/数据盘1/ideaproject/hdfsmodule/hdfstest/dir/hello1.txt"));
try {
IOUtils.copyBytes(fileInputStream,fsDataOutputStream,4096,false);
}catch (Exception e){
System.out.println(e.getMessage());
}finally {
fileSystem.close();
IOUtils.closeStream(fsDataOutputStream);
IOUtils.closeStream(fileInputStream);
}
}
/**
* 读取 hdfs 远程文件 输出到控制台
* @throws URISyntaxException
* @throws IOException
* @throws InterruptedException
*/
@Test
public void test11() throws URISyntaxException, IOException, InterruptedException {
Configuration configuration=new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://localhost:9000"), configuration, "stack");
FSDataInputStream open = fileSystem.open(new Path("/hello4.txt"));
try{
IOUtils.copyBytes(open,System.out,4096,false);
}catch (Exception exception){
System.out.println(exception.getMessage());
}finally {
IOUtils.closeStream(open);
IOUtils.closeStream(fileSystem);
}
}
/**
* 文件分块读取与下载
*/
/**
* 从控制台向 hdfs 写文件 一致性读写(直接通过控制台输入读取,即是读取内存中的数据写到 hdfs 中)
* 执行的是数据的覆盖操作
*/
@Test
public void test14() throws Exception{
// 1 创建配置信息对象
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration, "stack");
// 2 创建文件输出流
Path path = new Path("/hello4.txt");
FSDataOutputStream fos = fs.create(path);
// 3 写数据
fos.write("hello".getBytes());
// 4 一致性刷新
fos.hflush();
fos.close();
}
}