一、FileSystem
org.apache.hadoop.fs.FileSystem是hadoop的抽象文件系统,为不同的数据访问提供了统一的接口,并提供了大量具体文件系统的实现,满足hadoop上各种数据访问需求
如以下几个具体实现(原表格见《hadoop权威指南》):
[外链图片转存中…(img-oiwsBODl-1576743176529)]
二、Java接口
文件系统的方法分为两类:一部分处理文件和目录;一部分读写文件数据。
hadoop抽象文件系统的文件操作与java、linux的对应关系(原表格见《Hadoop技术内幕 深入解析HADOOP COMMON和HDFS架构设计与实现原理》):
[外链图片转存中…(img-bQ0kAotI-1576743176530)]
1.读取数据
URL方式
public static void main() throws Exception{
String uri = "hdfs://localhost:9000/input/input1.txt";
InputStream in = null;
try{
in = new URL(uri).openStream();
IOUtils.copyBytes(in, System.out, 4096, false);
}finally{
IOUtils.closeStream(in);
}
}
这种文件读取的方法具有一定的限制性。因为Java.net.URL的setURLStreamHandlerFactory方法每个java虚拟机最多调用一次,如果程序中有不受自己控制的第三方组件调用了这个方法,将无法使用这种方法从hadoop中读取数据。
FileSystem方式
public static void main(String[] args) throws Exception{
String uri = "hdfs://localhost:9000/input/input1.txt";
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
//第二种获取文件系统的方法
//FileSystem fs = FileSystem.newInstance(URI.create(uri), conf);
InputStream in = null;
try{
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
}finally{
IOUtils.closeStream(in);
}
}
2.新建文件夹
public static void mkdirs(String folder) throws IOException {
Path path = new Path(folder);
FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
if (!fs.exists(path)) {
fs.mkdirs(path);
System.out.println("Create: " + folder);
}
fs.close();
}
3.新建文件
public static void createFile(String file, String content) throws IOException {
FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
byte[] buff = content.getBytes();
FSDataOutputStream os = null;
try {
os = fs.create(new Path(file));
os.write(buff, 0, buff.length);
System.out.println("Create: " + file);
} finally {
if (os != null)
os.close();
}
fs.close();
}
public void createTest() throws Exception {
String localSrc = "D:/merge.txt";
String dst = "hdfs://master:9000/input/merge.txt";
InputStream in = new BufferedInputStream(new FileInputStream(localSrc));
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(dst), conf);
OutputStream out = null;
try {
out = fs.create(new Path(dst), new Progressable(){
public void progress(){
System.out.println(".");
}
});
IOUtils.copyBytes(in, out, 4096, true);
System.out.println("..");
} finally {
IOUtils.closeStream(in);
IOUtils.closeStream(out);
}
}
4.删除文件夹/文件
public static void rmr(String folder) throws IOException {
Path path = new Path(folder);
FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
fs.deleteOnExit(path);
System.out.println("Delete: " + folder);
fs.close();
}
5.列出路径下文件信息
public static void ls(String folder) throws IOException {
Path path = new Path(folder);
FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
FileStatus[] list = fs.listStatus(path);
System.out.println("ls: " + folder);
System.out.println("==========================================================");
for (FileStatus f : list) {
System.out.printf("name: %s, folder: %s, size: %d\n", f.getPath(), f.isDirectory(), f.getLen());
}
System.out.println("==========================================================");
fs.close();
}
6.复制本地文件到hdfs
public static void copyFile(String local, String remote) throws IOException {
FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
fs.copyFromLocalFile(new Path(local), new Path(remote));
System.out.println("copy from: " + local + " to " + remote);
fs.close();
}
7.从hdfs下载文件到本地
public static void download(String remote, String local) throws IOException {
Path path = new Path(remote);
FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
fs.copyToLocalFile(path, new Path(local));
System.out.println("download: from" + remote + " to " + local);
fs.close();
}
8.重命名文件
public static void rename(String src, String dst) throws IOException {
Path name1 = new Path(src);
Path name2 = new Path(dst);
FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
fs.rename(name1, name2);
System.out.println("Rename: from " + src + " to " + dst);
fs.close();
}
9.追加文件内容
public void appendTest() throws Exception {
String localSrc = "D:/merge.txt";
String dst = "hdfs://master:9000/input/merge.txt";
// System.setProperty( "hadoop.home.dir", "E:/Eclipse/eclipse/hadoop2.6_Win_x64-master" );
InputStream in = new BufferedInputStream(new FileInputStream(localSrc));
Configuration conf = new Configuration();
conf.setBoolean( "dfs.support.append", true );
conf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
conf.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true");
FileSystem fs = FileSystem.get(URI.create(dst), conf);
OutputStream out = null;
try {
out = fs.append(new Path(dst), 4096, new Progressable(){
public void progress() {
System.out.println(".");
}
});
IOUtils.copyBytes(in, out, 4096, true);
System.out.println("..");
} finally {
IOUtils.closeStream(in);
IOUtils.closeStream(out);
}
}
10.合并多个文件上传
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
FileSystem fs = FileSystem.getLocal(conf);
Path inputDir = new Path("D:/Test");
Path hdfsFile = new Path("hdfs://master:9000/input/test.txt");
try {
FileStatus[] inputFiles = fs.listStatus(inputDir);
FSDataOutputStream out = hdfs.create(hdfsFile);
for(int i=0; i<inputFiles.length; i++){
System.out.println(inputFiles[i].getPath().getName());
FSDataInputStream in = fs.open(inputFiles[i].getPath());
byte buffer[] = new byte[256];
int bytesRead = 0;
while((bytesRead = in.read(buffer)) > 0) {
out.write(buffer, 0, bytesRead);
}
in.close();
}
out.close();
} catch (IOException e){
e.printStackTrace();
}
}