8、HDFS 的 Java API 操作--eclipse新建/上传/下载/删除文件(夹)、(属性及块)信息查询、遍历文件(夹)信息查询、指定下载/分块下载

1、新建/上传/下载/删除文件(夹)

package test;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class HDFSTest2 {
	FileSystem fs = null;
	@Before
	public void before() throws Exception {
		URI uri = new URI("hdfs://hadoop01:9000");
		Configuration conf = new Configuration();
		String user = "hadoop";
		fs = FileSystem.get(uri , conf , user);
	}
	/*
	@Test
	//创建文件夹
	public void mkdir() throws Exception {
		Path f = new Path("/aa");
		boolean bool = fs.mkdirs(f);
		System.out.println(bool);		
	}
	*/
	/*
	//上传文件
	@Test
	public void copyFromLocal() throws Exception {
		Path src = new Path("E:/a.txt");	
		Path dst = new Path("/aa/a.txt");
		fs.copyFromLocalFile(src , dst);
		System.out.println("上传成功");
	}
	*/
	/*
	@Test
	//删除文件夹/文件
	public void delete() throws Exception {
		Path f = new Path("/aa");
		fs.delete(f);
		System.out.println("删除成功");
	}
	*/
	/*
	//下载文件
	@Test
	public void copyToLocal() throws Exception {
		Path src = new Path("/hadoop-2.7.7-centos-6.7.tar.gz");
		Path dst = new Path("E:/hadoop-2.7.7-centos-6.7.tar.gz");
		fs.copyToLocalFile(false,src , dst,true);
	}
	*/

	@Test
	//下载文件get ,流式数据访问
	public void get() throws IllegalArgumentException, IOException {
		//获取hdfs文件系统的输入流(输入流:读取文件)
		Path f = new Path("/hadoop-2.7.7-centos-6.7.tar.gz");
		FSDataInputStream in = fs.open(f);
		//获取本地文件系统的输出流
		FileOutputStream out = new FileOutputStream("E:/hadoop-2.7.7-centos-6.7.tar.gz");
		//读写
		IOUtils.copyBytes(in, out, 4096);
		System.out.println("下载成功");
		//关闭
		in.close();
		out.close();
	}
	@Test
	//上传文件put ,流式数据访问
	public void put() throws Exception {
		//获取本地文件系统的输入流
		FileInputStream in = new FileInputStream("E:/a.txt");
		System.out.println(in);
		//获取hdfs文件系统的输出流(输出流:写入文件)
		Path f = new Path("/a.txt");
		FSDataOutputStream out = fs.create(f);
		//读写	
		IOUtils.copyBytes(in, out, 4096);
		System.out.println("上传成功");
		in.close();
		out.close();
	}
	
	@After
	public void after() throws IOException {
		fs.close();	
	}
}

2、单个文件(属性及块)信息查询

package test;

import java.io.IOException;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import com.google.common.io.Files;

public class HDFSTest3 {
	FileSystem fs = null;
	@Before
	//创建与hdfs的连接
	public void before() throws Exception {
		URI uri = new URI("hdfs://hadoop01:9000");
		Configuration conf =new  Configuration();
		conf.set("dfs.replication", "2");
		String user = new String("hadoop");
		fs = FileSystem.get(uri, conf, user);			
		}
	@Test
	//操作:获取某个文件的属性信息,块信息
	public void fileStatus() throws Exception {
		Path f = new Path("/hadoop-2.7.7-centos-6.7.tar.gz");
		FileStatus fileStatus = fs.getFileStatus(f );
		//获取文件属性信息
		System.out.println(fileStatus.getPath());
		System.out.println(fileStatus.getLen());
		System.out.println(fileStatus.isDir());
		System.out.println(fileStatus.getBlockSize());
		System.out.println(fileStatus.getReplication());
		System.out.println(fileStatus.getGroup());
		System.out.println(fileStatus.isSymlink());
		System.out.println(fileStatus.getOwner());
		System.out.println(fileStatus.getPermission());
		System.out.println(fileStatus.getModificationTime());
			long modification_time = fileStatus.getModificationTime();
			SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
			String sd = sdf.format(new Date(Long.parseLong(String.valueOf(modification_time))));      // 时间戳转换成时间
			System.out.println("格式化结果:" + sd);
		System.out.println(fileStatus.getAccessTime());
			long access_time = fileStatus.getAccessTime();
			SimpleDateFormat sdf1=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
			String sd1 = sdf1.format(new Date(Long.parseLong(String.valueOf(access_time))));      // 时间戳转换成时间
			System.out.println("格式化结果:" + sd1);
		//获取文件块信息
		Path file = fileStatus.getPath();
		long start = 0;
		long len = fileStatus.getLen();
		BlockLocation[] getFileBlockLocations = fs.getFileBlockLocations(file , start , len );
		System.out.println(getFileBlockLocations.length);
		System.out.println(getFileBlockLocations[0]);		
	}
	@After
	//关闭连接
	public void after() throws Exception {
		fs.close();	
	}	
}

3、循环遍历文件(属性及块)信息

package test;

import java.net.URI;
import java.util.Arrays;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class HDFSTest4 {
	FileSystem fs = null;
	@Before
	public void before() throws Exception {	
		URI uri = new URI("hdfs://hadoop01:9000");
		Configuration conf = new Configuration();
		conf.set("dfs.replication", "2");
		String user =new  String("hadoop");
		fs = FileSystem.get(uri, conf, user);
	}
	@Test
	//遍历文件fs.listFiles(Path, true/false) 其中true表示递归;
	public void listFile() throws Exception {
		Path f = new Path("/");
		//获取迭代器
		RemoteIterator<LocatedFileStatus> iterator= fs.listFiles(f, true);
		//循环迭代器
		while(iterator.hasNext()) {
			//LocatedFileStatus这个类定义了一个包含文件块位置的FileStatus。
			LocatedFileStatus status = iterator.next();
			System.out.println("Path="+status.getPath());
			System.out.println("Owner="+status.getOwner());
			System.out.println("Len="+status.getLen());
			System.out.println("Permission="+status.getPermission());
			System.out.println("Replication="+status.getReplication());
			System.out.println("BlockSize="+status.getBlockSize());
			System.out.println("BlockLocations="+Arrays.toString( status.getBlockLocations()));
			System.out.println("Block-sum="+status.getBlockLocations().length);
			System.out.println("++++++");
			//遍历输出块信息
			BlockLocation[]  blockLocations = status.getBlockLocations();	
					for (int i = 0; i < blockLocations.length; i++) {
					 System.out.println("Block-count="+(i +1));
				         System.out.println("blockLocations="+blockLocations[i] + " ");
				         System.out.println("Hosts="+Arrays.toString( blockLocations[i].getHosts()));
				         System.out.println("Names="+Arrays.toString( blockLocations[i].getNames()));
				         System.out.println("Offset="+blockLocations[i].getOffset());
				         System.out.println("Length="+blockLocations[i].getLength());
				      }
			System.out.println("======================================");	
		}	
	}
	@After
	public void after() throws Exception {
		fs.close();
	}
}

4、部分(指定起止位置)下载与分块下载

//指定文件内容的起止位置下载部分内容
package test;

import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class HDFSTest5 {
	FileSystem fs = null;
	@Before
	public void before() throws  Exception {
		URI uri = new URI("hdfs://hadoop01:9000");
		Configuration conf = new Configuration();
		conf.set("dfs.replication", "2");
		String user = "hadoop";
		fs = FileSystem.get(uri, conf, user);
	}
	@Test
	//从指定位置下载指定长度的文件下载
	public void get() throws IOException {
		//获取hdfs文件系统的输入流(输入流:读取文件)
		Path f = new Path("/a.txt");
		FSDataInputStream in = fs.open(f );
		//获取本地文件系统的输出流
		FileOutputStream out= new FileOutputStream("e:/b.txt");	
		//读写
		in.seek(6);//指定开始读取的位置
		long conf = 20L;  //指定读取内容起止之间的长度。4096表示用来拷贝的buffer大小,会循环全部获取内容
		boolean close = "false" != null;//true - 是否关闭数据流,如果是false,就在finally里关掉。
		//IOUtils.copyBytes(in, out, 4096 ); //全部下载
		IOUtils.copyBytes(in, out, conf, close );//读写
		System.out.println("下载结束");
		//关闭
		in.close();
		out.close();
	}
	@After
	public void after() throws IOException {
		fs.close();
	}
}
//指定hdfs的块下载方法一:
package test;

import java.io.FileOutputStream;
import java.net.URI;
import java.util.Arrays;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class HDFSTest6 {
	FileSystem fs = null;
	@Before
	public void before() throws Exception {	
		URI uri = new URI("hdfs://hadoop01:9000");
		Configuration conf = new Configuration();
		conf.set("dfs.replication", "2");
		String user =new  String("hadoop");
		fs = FileSystem.get(uri, conf, user);
	}
	@Test
	//遍历文件fs.listFiles(Path, true/false) 其中true表示递归;
	public void listFile() throws Exception {
		Path f = new Path("/hadoop-2.7.7-centos-6.7.tar.gz");
		//获取迭代器
		RemoteIterator<LocatedFileStatus> iterator= fs.listFiles(f, true);
		//循环迭代器
		while(iterator.hasNext()) {
			//LocatedFileStatus这个类定义了一个包含文件块位置的FileStatus。
			LocatedFileStatus status = iterator.next();
			System.out.println("BlockLocations="+Arrays.toString( status.getBlockLocations()));
			System.out.println("Block-sum="+status.getBlockLocations().length);
			
			//遍历输出块信息
			BlockLocation[]  blockLocations = status.getBlockLocations();	
					for (int i = 0; i < blockLocations.length; i++) {
						System.out.println("---------------------------------");
						System.out.println("Block-count="+ i );
				         System.out.println("blockLocations="+blockLocations[i] + " ");
				         System.out.println("Offset="+blockLocations[i].getOffset());
				         System.out.println("Length="+blockLocations[i].getLength());
				         //如果存在第2块,下载
				         if(i == 1) {
				        	 //获取hdfs文件系统的输入流(输入流:读取文件)
				        	 FSDataInputStream in = fs.open(f);
				        	 //获取本地文件系统的输出流
				        	 FileOutputStream out = new FileOutputStream("e:/hadoop-2.7.7-centos-6.7.tar.gz");
				        	//读写
				        	 in.seek(blockLocations[i].getOffset());
				        	 long conf = blockLocations[i].getLength();
				        	 IOUtils.copyBytes(in, out, conf, true);
				        	 System.out.println("下载结束");
				        	 //关闭
				        	 in.close();
				        	 out.close();
				         }
				      }
		}	
	}
	@After
	public void after() throws Exception {
		fs.close();
	}
}
//指定hdfs块下载方法2:
package test;

import java.io.FileOutputStream;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class HDFSTest6 {
	FileSystem fs = null;
	@Before
	public void before() throws Exception {	
		URI uri = new URI("hdfs://hadoop01:9000");
		Configuration conf = new Configuration();
		conf.set("dfs.replication", "2");
		String user =new  String("hadoop");
		fs = FileSystem.get(uri, conf, user);
	}
	@Test
	//下载指定块Block文件
	public void getBlock() throws Exception {
		//获取文件的基本信息
		Path f = new 	Path("/hadoop-2.7.7-centos-6.7.tar.gz");
		FileStatus getFileStatus = fs.getFileStatus(f );
		//获取文件的块信息
		Path file =getFileStatus.getPath() ;
		long start = 0;
		long len =getFileStatus.getLen();
		BlockLocation[] blockLocation =  fs.getFileBlockLocations(file, start, len);
		//判断是否存在第2块
		if(blockLocation.length<2) {
			throw new RuntimeException("该文件模块数<2");
		}else {
			//获取第2块的offset
			long  offset = blockLocation[1].getOffset();
			//获取第2块的长度
			long  length = blockLocation[1].getLength();
			//构建输入输出流
			FSDataInputStream in = fs.open(f);
			FileOutputStream out = new FileOutputStream("e:/cc");
			//指定offset
			in.seek(offset);
			long count = length;
			//执行读写
			IOUtils.copyBytes(in, out, count , true);	
			System.out.println("下载块2完成");
			//关闭
			in.close();
			out.close();
		}	
	}
	@After
	public void after() throws Exception {
		fs.close();
	}
}

 

  • 1
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值