HDFS 设计的主要目的是对海量数据进行存储,也就是说在其上能够存储很大量的文件。
HDFS 将这些文件分割之后,存储在不同的 DataNode 上,HDFS 提供了通过Java API 对 HDFS 里面的文件进行操作的功能,数据块在 DataNode 上的存放位置,对于开发者来说是透明的。
使用 Java API 可以完成对 HDFS 的各种操作,如新建文件、删除文件、读取文件内容等。下面将介绍 HDFS 常用的 Java API 及其编程实例。
对 HDFS 中的文件操作主要涉及以下几个类。
写了一个增删改查的例子,如下,有问题欢迎大家指出:
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSClient.Conf;
public class HDFSDemo03 {
public static void createFile() throws IOException {
String uri = "hdfs://itcast01:8000";
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
byte[] file_content = "hello big data!\n".getBytes();
Path hdfs = new Path("/az0326/test.txt");
FSDataOutputStream outputStream = fs.create(hdfs);
outputStream.write(file_content);
}
public static void deleteFile() throws IOException {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://itcast01:8000");
FileSystem fs = FileSystem.get(conf);
Path hdfs = new Path("/jdk");
boolean delResult = fs.delete(hdfs, true);
System.out.println(delResult==true?"删除成功":"删除失败");
}
public static void copyFileToHdfs() throws IOException {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://itcast01:8000");
FileSystem fs = FileSystem.get(conf);
Path src = new Path("D:\\项目\\Desktop.zip");
Path dst = new Path("/az0326");
fs.copyFromLocalFile(src, dst);
}
public static void MkDirs() throws IOException {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://itcast01:8000");
FileSystem fs = FileSystem.get(conf);
Path f = new Path("/azTest");
fs.mkdirs(f);
}
public static void DelMkdirs() throws IOException{
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://itcast01:8000");
FileSystem fs = FileSystem.get(conf);
Path f = new Path("/azTest");
boolean res = fs.delete(f, true);
System.out.println(res==true?"success":"fail");
}
public static void ListDirectory() throws IOException{
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://itcast01:8000");
FileSystem fs = FileSystem.get(conf);
FileStatus[] fsta = fs.listStatus(new Path("/hbase"));
for(FileStatus status:fsta) {
if(status.isFile()) {
System.out.println("文件路径:"+status.getPath().toString());
System.out.println("文件路径 getReplication:"+status.getReplication());
System.out.println("文件路径 getBlockSize:"+status.getBlockSize());
BlockLocation[] blocklocations = fs.getFileBlockLocations(status, 0, status.getBlockSize());
for(BlockLocation location : blocklocations) {
System.out.println("主机名:"+location.getHosts()[0]);
System.out.println("主机名:"+location.getNames());
}
}else {
System.out.println("directory:"+status.getPath().toString());
}
}
}
public static void DownLoad() throws IOException {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://itcast01:8000");
FileSystem fs = FileSystem.get(conf);
FSDataInputStream inputStream = fs.open(new Path("/az0326/test.txt"));
FileOutputStream outputStream = new FileOutputStream(new File("D:\\英雄时刻\\data.txt"));
IOUtils.copy(inputStream, outputStream);
System.out.println("下载成功!");
}
public static void main(String[] args) throws IOException {
//createFile();
//deleteFile();
//copyFileToHdfs();
//MkDirs();
//DelMkdirs();
//ListDirectory();
DownLoad();
}
}
思想:
一、定义虚拟机接口
二、先拿到HDFS远程调用接口对象Configuration
三、定义分布式文件系统FileSystem对象获取对象
四、给定路径
五、用FileSystem对象调用操作