笔记-hadoop-HDFS客户端操作

最新推荐文章于 2022-09-04 03:47:09 发布

liu_1221

最新推荐文章于 2022-09-04 03:47:09 发布

阅读量126

点赞数

分类专栏：笔记-hadoop

本文链接：https://blog.csdn.net/liu_1221/article/details/95894511

版权

笔记-hadoop 专栏收录该内容

18 篇文章 0 订阅

订阅专栏

HDFS客户端环境准备

准备hadoop jar

根据自己电脑的操作系统拷贝对应的编译后的hadoop jar包

解压到非中文路径（D:\hadoop-2.7.2）

配置HADOOP_HOME环境变量

HADOOP_HOME

D:\hadoop-2.7.2

配置Path环境变量

%HADOOP_HOME%\bin;

创建一个Maven工程

eclipse创建HDFS工程

导入相应的依赖

pom.xml文件中添加

<dependencies>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.apache.logging.log4j</groupId>
			<artifactId>log4j-core</artifactId>
			<version>2.8.2</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-common</artifactId>
			<version>2.7.2</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-client</artifactId>
			<version>2.7.2</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-hdfs</artifactId>
			<version>2.7.2</version>
		</dependency>
		<dependency>
			<groupId>jdk.tools</groupId>
			<artifactId>jdk.tools</artifactId>
			<version>1.8</version>
			<scope>system</scope>
			<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
		</dependency>
</dependencies>

配置日志打印

在项目的src/main/resources目录下新建一个文件log4j.properties

在文件中填入:

log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n

HDFS的API操作

HDFS文件上传

	//文件上传
	@Test
	public void testCopyFromLocalFile() throws IOException, InterruptedException, URISyntaxException {
		
		// 1 获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2执行上传API
		fs.copyFromLocalFile(new Path("e:/test.txt"), new Path("/user/liun/shangchuan.txt"));
		
		// 3关闭资源
		fs.close();
		System.out.println("上传完毕");
	}

HDFS文件下载

//文件下载
	@Test
	public void getFileFromHDFS() throws IOException, InterruptedException, URISyntaxException {
		
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2执行下载API
		fs.copyToLocalFile(new Path("/user/liun/shangchuan.txt"), new Path("f:/"));
		
		// 3关闭资源
		fs.close();
		System.out.println("下载完毕");
	}

HDFS文件名更改

	//文件更名
	@Test
	public void testRename() throws IOException, InterruptedException, URISyntaxException {
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2修改文件名
		fs.rename(new Path("/user/liun/shangchuan.txt"), new Path("/user/liun/test.txt"));
		
		//关闭资源
		fs.close();
		System.out.println("更名完毕");
	}

HDFS文件夹删除

	//文件删除
	@Test
	public void testDelete() throws IOException, InterruptedException, URISyntaxException {
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2执行删除
		fs.delete(new Path("/user/liun/"), true);
		
		//关闭资源
		fs.close();
		System.out.println("删除完毕");
	}

HDFS文件详情查看

	@Test
	public void testListFiles() throws IOException, InterruptedException, URISyntaxException {
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		//文件详情查看
		RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
		
		while (listFiles.hasNext()) {
			 LocatedFileStatus fileStatus = listFiles.next();		
			//文件名称
			 System.out.println(fileStatus.getPath().getName());		 
			//权限
			 System.out.println(fileStatus.getPermission());
			//长度
			 System.out.println(fileStatus.getLen());
			//块信息
			 BlockLocation[] blockLocations = fileStatus.getBlockLocations();
			 
			 for (BlockLocation blockLocation : blockLocations) {
				 String[] hosts = blockLocation.getHosts();
				 for (String host : hosts) {
					 System.out.println(host);
				}	
			}
			 System.out.println("----------------------");
		}

		关闭资源
		fs.close();
	}

HDFS文件和文件夹判断

	@Test
	public void testListStatus() throws IOException, InterruptedException, URISyntaxException {
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2判断操作
		FileStatus[] listStatus = fs.listStatus(new Path("/"));
		for (FileStatus fileStatus : listStatus) {
			
			if (fileStatus.isFile()) {
				//文件
				System.out.println("文件："+fileStatus.getPath().getName());
			}else {
				//文件夹
				System.out.println("文件夹："+fileStatus.getPath().getName());
			}
		}
		
		// 3关闭资源
		fs.close();
	}

HDFS的I/O流操作

HDFS文件上传

//本地文件上传到HDFS
	@Test
	public void putFileToHDFS() throws IOException, InterruptedException, URISyntaxException {
		
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2获取输入流
		FileInputStream fis = new FileInputStream("E:/input/test.dat");
		
		// 3获取输出流
		FSDataOutputStream fos = fs.create(new Path("/user/liun/test.txt"));
		
		// 4流的对拷
		IOUtils.copyBytes(fis, fos, conf);
		
		// 5关闭资源
		IOUtils.closeStream(fos);
		IOUtils.closeStream(fis);
		fs.close();
	}

HDFS文件下载

//从HDFS上下载文件到本地
	@Test
	public void getFileFromHDFS() throws IOException, InterruptedException, URISyntaxException {
		
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2获取输入流
		FSDataInputStream fis = fs.open(new Path("/user/liun/test.txt"));
		
		// 3获取输出流
		FileOutputStream fos = new FileOutputStream(new File("E:/input/test.txt"));

		// 4流的对拷
		IOUtils.copyBytes(fis, fos, conf);
		
		// 5关闭资源
		IOUtils.closeStream(fos);
		IOUtils.closeStream(fis);
		fs.close();
	}

定位文件读取

需求：分块读取HDFS上的大文件

下载第一块

//下载第一块
	@Test
	public void readFileSeek1() throws IOException, InterruptedException, URISyntaxException {
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2获取输入流
		FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));
		
		// 3获取输出流
		FileOutputStream fos = new FileOutputStream(new File("e:/hadoop-2.7.2.tar.gz.part1"));
		
		// 4流的对拷(只考128M)
		 byte[] buf=new byte[1024];
		 for (int i = 0; i < 1024*128; i++) {
			fis.read(buf);
			fos.write(buf);
		}
		
		 // 5关闭资源
		 IOUtils.closeStream(fos);
		 IOUtils.closeStream(fis);
		 fs.close();
	}

下载第二块

	//下载第二块
	@Test
	public void readFileSeek2() throws IOException, InterruptedException, URISyntaxException {
		// 1获取fs对象
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), conf, "liun");
		
		// 2获取输入流
		FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));
		
		// 3定位输入数据位置
		fis.seek(1024*1024*128);
		
		// 4获取输出流
		FileOutputStream fos = new FileOutputStream(new File("e:/hadoop-2.7.2.tar.gz.part2"));
		
		// 5流的对拷
		IOUtils.copyBytes(fis, fos, conf);
		
		// 6关闭资源
		IOUtils.closeStream(fos);
		IOUtils.closeStream(fis);
		fs.close();
	}

合并文件
cmd命令

type hadoop-2.7.2.tar.gz.part2 >> hadoop-2.7.2.tar.gz.part1

合并完成后，将hadoop-2.7.2.tar.gz.part1重新命名为hadoop-2.7.2.tar.gz。解压发现该tar包非常完整。

liu_1221

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录