Hadoop文件系统及其java接口

一、FileSystem

org.apache.hadoop.fs.FileSystem是hadoop的抽象文件系统,为不同的数据访问提供了统一的接口,并提供了大量具体文件系统的实现,满足hadoop上各种数据访问需求
如以下几个具体实现(原表格见《hadoop权威指南》):
[外链图片转存中…(img-oiwsBODl-1576743176529)]
这里写图片描述

二、Java接口

文件系统的方法分为两类:一部分处理文件和目录;一部分读写文件数据。
hadoop抽象文件系统的文件操作与java、linux的对应关系(原表格见《Hadoop技术内幕 深入解析HADOOP COMMON和HDFS架构设计与实现原理》):
[外链图片转存中…(img-bQ0kAotI-1576743176530)]
这里写图片描述

1.读取数据

URL方式

public static void main() throws Exception{
      String uri = "hdfs://localhost:9000/input/input1.txt";
      InputStream in = null;
      try{
          in = new URL(uri).openStream();
          IOUtils.copyBytes(in, System.out, 4096, false);
      }finally{
          IOUtils.closeStream(in);
      }
  }

这种文件读取的方法具有一定的限制性。因为Java.net.URL的setURLStreamHandlerFactory方法每个java虚拟机最多调用一次,如果程序中有不受自己控制的第三方组件调用了这个方法,将无法使用这种方法从hadoop中读取数据。

FileSystem方式

public static void main(String[] args) throws Exception{
        String uri = "hdfs://localhost:9000/input/input1.txt";
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        //第二种获取文件系统的方法
        //FileSystem fs = FileSystem.newInstance(URI.create(uri), conf);
        InputStream in = null;
        try{
            in = fs.open(new Path(uri));
            IOUtils.copyBytes(in, System.out, 4096, false);
        }finally{
            IOUtils.closeStream(in);
        }
    }

2.新建文件夹

public static void mkdirs(String folder) throws IOException {
        Path path = new Path(folder);
        FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
        if (!fs.exists(path)) {
            fs.mkdirs(path);
            System.out.println("Create: " + folder);
        }
        fs.close();
    }

3.新建文件

public static void createFile(String file, String content) throws IOException {
        FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
        byte[] buff = content.getBytes();
        FSDataOutputStream os = null;
        try {
            os = fs.create(new Path(file));
            os.write(buff, 0, buff.length);
            System.out.println("Create: " + file);
        } finally {
            if (os != null)
                os.close();
        }
        fs.close();
    }

public void createTest() throws Exception {
		String localSrc = "D:/merge.txt";
		String dst = "hdfs://master:9000/input/merge.txt";
		
		InputStream in = new BufferedInputStream(new FileInputStream(localSrc));
		
		Configuration conf = new Configuration();
		
		FileSystem fs = FileSystem.get(URI.create(dst), conf);
		
		OutputStream out = null;
		
		try {
			out = fs.create(new Path(dst), new Progressable(){
				public void progress(){
					System.out.println(".");
				}
			}); 
			IOUtils.copyBytes(in, out, 4096, true);
			System.out.println("..");	
		} finally {
			IOUtils.closeStream(in);
			IOUtils.closeStream(out);
		}
	}

4.删除文件夹/文件

public static void rmr(String folder) throws IOException {
        Path path = new Path(folder);
        FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
        fs.deleteOnExit(path);
        System.out.println("Delete: " + folder);
        fs.close();
    }

5.列出路径下文件信息

public static void ls(String folder) throws IOException {
        Path path = new Path(folder);
        FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
        FileStatus[] list = fs.listStatus(path);
        System.out.println("ls: " + folder);
        System.out.println("==========================================================");
        for (FileStatus f : list) {
            System.out.printf("name: %s, folder: %s, size: %d\n", f.getPath(), f.isDirectory(), f.getLen());
        }
        System.out.println("==========================================================");
        fs.close();
    }

6.复制本地文件到hdfs

public static void copyFile(String local, String remote) throws IOException {
        FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
        fs.copyFromLocalFile(new Path(local), new Path(remote));
        System.out.println("copy from: " + local + " to " + remote);
        fs.close();
    }

7.从hdfs下载文件到本地

public static void download(String remote, String local) throws IOException {
        Path path = new Path(remote);
        FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
        fs.copyToLocalFile(path, new Path(local));
        System.out.println("download: from" + remote + " to " + local);
        fs.close();
    }

8.重命名文件

public static void rename(String src, String dst) throws IOException {
        Path name1 = new Path(src);
        Path name2 = new Path(dst);
        FileSystem fs = FileSystem.get(URI.create(HDFS), conf);
        fs.rename(name1, name2);
        System.out.println("Rename: from " + src + " to " + dst);
        fs.close();
    }

9.追加文件内容

public void appendTest() throws Exception {
		String localSrc = "D:/merge.txt";
		String dst = "hdfs://master:9000/input/merge.txt";
//		System.setProperty( "hadoop.home.dir", "E:/Eclipse/eclipse/hadoop2.6_Win_x64-master" );
		InputStream in = new BufferedInputStream(new FileInputStream(localSrc));
		Configuration conf = new Configuration();
		conf.setBoolean( "dfs.support.append", true );
		conf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER"); 
		conf.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true"); 
		FileSystem fs = FileSystem.get(URI.create(dst), conf);
		OutputStream out = null;
		
		try {
			out = fs.append(new Path(dst), 4096, new Progressable(){
				public void progress() {
					System.out.println(".");
				}
			});
			
			IOUtils.copyBytes(in, out, 4096, true);
			System.out.println("..");
		} finally {
			IOUtils.closeStream(in);
			IOUtils.closeStream(out);
		}
		
	}

10.合并多个文件上传

public static void main(String[] args) throws IOException {
		// TODO Auto-generated method stub
		
		Configuration conf = new Configuration();
		FileSystem hdfs = FileSystem.get(conf);
		FileSystem fs = FileSystem.getLocal(conf);
		
		Path inputDir = new Path("D:/Test");
		Path hdfsFile = new Path("hdfs://master:9000/input/test.txt");
		
		try {
			FileStatus[] inputFiles = fs.listStatus(inputDir);
			FSDataOutputStream out = hdfs.create(hdfsFile);
			
			for(int i=0; i<inputFiles.length; i++){
				System.out.println(inputFiles[i].getPath().getName());
				FSDataInputStream in = fs.open(inputFiles[i].getPath());
				byte buffer[] = new byte[256];
				int bytesRead = 0;
				while((bytesRead = in.read(buffer)) > 0) {
					out.write(buffer, 0, bytesRead);
				}
				in.close();
			}
			out.close();
		} catch (IOException e){
			e.printStackTrace();
		}
	}
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值