笔记-hadoop-HDFS客户端操作

HDFS客户端环境准备

准备hadoop jar
根据自己电脑的操作系统拷贝对应的编译后的hadoop jar包
解压到非中文路径(D:\hadoop-2.7.2)
配置HADOOP_HOME环境变量
HADOOP_HOME
D:\hadoop-2.7.2
配置Path环境变量
%HADOOP_HOME%\bin;
创建一个Maven工程
eclipse创建HDFS工程
导入相应的依赖
pom.xml文件中添加
<dependencies>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.apache.logging.log4j</groupId>
			<artifactId>log4j-core</artifactId>
			<version>2.8.2</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-common</artifactId>
			<version>2.7.2</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-client</artifactId>
			<version>2.7.2</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-hdfs</artifactId>
			<version>2.7.2</version>
		</dependency>
		<dependency>
			<groupId>jdk.tools</groupId>
			<artifactId>jdk.tools</artifactId>
			<version>1.8</version>
			<scope>system</scope>
			<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
		</dependency>
</dependencies>
配置日志打印
在项目的src/main/resources目录下新建一个文件log4j.properties
在文件中填入:
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n

HDFS的API操作

HDFS文件上传
	//文件上传
	@Test
	public void testCopyFromLocalFile() throws IOException, InterruptedException, URISyntaxException {
		
		// 1 获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2执行上传API
		fs.copyFromLocalFile(new Path("e:/test.txt"), new Path("/user/liun/shangchuan.txt"));
		
		// 3关闭资源
		fs.close();
		System.out.println("上传完毕");
	}
HDFS文件下载
//文件下载
	@Test
	public void getFileFromHDFS() throws IOException, InterruptedException, URISyntaxException {
		
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2执行下载API
		fs.copyToLocalFile(new Path("/user/liun/shangchuan.txt"), new Path("f:/"));
		
		// 3关闭资源
		fs.close();
		System.out.println("下载完毕");
	}
HDFS文件名更改
	//文件更名
	@Test
	public void testRename() throws IOException, InterruptedException, URISyntaxException {
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2修改文件名
		fs.rename(new Path("/user/liun/shangchuan.txt"), new Path("/user/liun/test.txt"));
		
		//关闭资源
		fs.close();
		System.out.println("更名完毕");
	}
HDFS文件夹删除
	//文件删除
	@Test
	public void testDelete() throws IOException, InterruptedException, URISyntaxException {
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2执行删除
		fs.delete(new Path("/user/liun/"), true);
		
		//关闭资源
		fs.close();
		System.out.println("删除完毕");
	}
HDFS文件详情查看
	@Test
	public void testListFiles() throws IOException, InterruptedException, URISyntaxException {
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		//文件详情查看
		RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
		
		while (listFiles.hasNext()) {
			 LocatedFileStatus fileStatus = listFiles.next();		
			//文件名称
			 System.out.println(fileStatus.getPath().getName());		 
			//权限
			 System.out.println(fileStatus.getPermission());
			//长度
			 System.out.println(fileStatus.getLen());
			//块信息
			 BlockLocation[] blockLocations = fileStatus.getBlockLocations();
			 
			 for (BlockLocation blockLocation : blockLocations) {
				 String[] hosts = blockLocation.getHosts();
				 for (String host : hosts) {
					 System.out.println(host);
				}	
			}
			 System.out.println("----------------------");
		}

		关闭资源
		fs.close();
	}
HDFS文件和文件夹判断
	@Test
	public void testListStatus() throws IOException, InterruptedException, URISyntaxException {
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2判断操作
		FileStatus[] listStatus = fs.listStatus(new Path("/"));
		for (FileStatus fileStatus : listStatus) {
			
			if (fileStatus.isFile()) {
				//文件
				System.out.println("文件:"+fileStatus.getPath().getName());
			}else {
				//文件夹
				System.out.println("文件夹:"+fileStatus.getPath().getName());
			}
		}
		
		// 3关闭资源
		fs.close();
	}

HDFS的I/O流操作

HDFS文件上传
//本地文件上传到HDFS
	@Test
	public void putFileToHDFS() throws IOException, InterruptedException, URISyntaxException {
		
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2获取输入流
		FileInputStream fis = new FileInputStream("E:/input/test.dat");
		
		// 3获取输出流
		FSDataOutputStream fos = fs.create(new Path("/user/liun/test.txt"));
		
		// 4流的对拷
		IOUtils.copyBytes(fis, fos, conf);
		
		// 5关闭资源
		IOUtils.closeStream(fos);
		IOUtils.closeStream(fis);
		fs.close();
	}
HDFS文件下载
//从HDFS上下载文件到本地
	@Test
	public void getFileFromHDFS() throws IOException, InterruptedException, URISyntaxException {
		
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2获取输入流
		FSDataInputStream fis = fs.open(new Path("/user/liun/test.txt"));
		
		// 3获取输出流
		FileOutputStream fos = new FileOutputStream(new File("E:/input/test.txt"));

		// 4流的对拷
		IOUtils.copyBytes(fis, fos, conf);
		
		// 5关闭资源
		IOUtils.closeStream(fos);
		IOUtils.closeStream(fis);
		fs.close();
	}
定位文件读取

需求:分块读取HDFS上的大文件

下载第一块

//下载第一块
	@Test
	public void readFileSeek1() throws IOException, InterruptedException, URISyntaxException {
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2获取输入流
		FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));
		
		// 3获取输出流
		FileOutputStream fos = new FileOutputStream(new File("e:/hadoop-2.7.2.tar.gz.part1"));
		
		// 4流的对拷(只考128M)
		 byte[] buf=new byte[1024];
		 for (int i = 0; i < 1024*128; i++) {
			fis.read(buf);
			fos.write(buf);
		}
		
		 // 5关闭资源
		 IOUtils.closeStream(fos);
		 IOUtils.closeStream(fis);
		 fs.close();
	}

下载第二块

	//下载第二块
	@Test
	public void readFileSeek2() throws IOException, InterruptedException, URISyntaxException {
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2获取输入流
		FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));
		
		// 3定位输入数据位置
		fis.seek(1024*1024*128);
		
		// 4获取输出流
		FileOutputStream fos = new FileOutputStream(new File("e:/hadoop-2.7.2.tar.gz.part2"));
		
		// 5流的对拷
		IOUtils.copyBytes(fis, fos, conf);
		
		// 6关闭资源
		IOUtils.closeStream(fos);
		IOUtils.closeStream(fis);
		fs.close();
	}

合并文件
cmd命令

type hadoop-2.7.2.tar.gz.part2 >> hadoop-2.7.2.tar.gz.part1

合并完成后,将hadoop-2.7.2.tar.gz.part1重新命名为hadoop-2.7.2.tar.gz。解压发现该tar包非常完整。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值