HDFS的Java api+步骤+api

最新推荐文章于 2023-11-02 15:55:44 发布

勤径苦舟

最新推荐文章于 2023-11-02 15:55:44 发布

阅读量601

点赞数

分类专栏：大数据

本文链接：https://blog.csdn.net/zhou920786312/article/details/85931850

版权

大数据专栏收录该内容

40 篇文章 0 订阅

订阅专栏

步骤

1下载hadoop源文件

2因为要在window下测试，hadoop源文件需要编译，编译后下面2个文件下多出点内容，就是一些在当前window操作系统下能让hadoop跑起来的文件

如下图所示（这都是编译过的(window7系统)，如果是window8系统，你需要在win8编译下源文件）

这个文件我已上传https://download.csdn.net/download/zhou920786312/10899379

3配置环境变量

HADOOP_HOME
H:\java\hadoop-2.6.1

path
;%HADOOP_HOME%/bin

测试代码

   public static void main(String[] args) throws Exception {
       Configuration conf = new Configuration();
       conf.set("fs.defaultFS", "hdfs://mini-yum:9000");
       //拿到一个文件系统操作的客户端实例对象

//注意哦，我在启动的是使用root用户，所以文件的权限都是root,所以下面才使用root,后面我格式化hdfs了
       FileSystem fs = FileSystem.get(new URI("hdfs://mini-yum:9000"),conf,"root"); //最后一个参数为用户名
       fs.copyFromLocalFile(new Path("H:/test/c.txt"), new Path("/aaa"));
       fs.close();
   }

api的操作就是shell命令

1上传文件

2下载文件

3打印配置参数

4创建目录

5删除

6递归列出指定目录下所有子文件夹中的文件

为了测试，我在创建了/ccc/1.txt , /ccc/2.txt,结构如下

测试

不知道小伙伴有没有发现b.txt,1.txt,2.txt都没有副本（1.txt和2.txt都拷贝b.txt）,是因为b.txt没有数据吗？（是因为没有数据原因）

我们做了测试，上传一个是有数据的3.txt

结果和我们猜测一样

7查看当前路径的文件

8通过流的方式上传文件到hdfs

9通过流的方式获取hdfs上数据

10截取指定大小的文件（例如：下载文件可以指定从2字节打10字节的文件）

11显示hdfs上文件的内容

所有测试的代码

package cn.feizhou.hdfs;


import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.util.Iterator;
import java.util.Map.Entry;

import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Before;
import org.junit.Test;
/**
 * 
 * 客户端去操作hdfs时，是有一个用户身份的
 * 默认情况下，hdfs客户端api会从jvm中获取一个参数来作为自己的用户身份：-DHADOOP_USER_NAME=hadoop
 * 
 * 也可以在构造客户端fs对象时，通过参数传递进去
 * @author
 *
 */
public class HdfsClientDemo {
	FileSystem fs = null;
	Configuration conf = null;
	@Before
	public void init() throws Exception{
		conf = new Configuration();
		conf.set("dfs.replication", "3");
		//拿到一个文件系统操作的客户端实例对象
		fs = FileSystem.get(conf);
		//可以直接传入 uri和用户身份
		fs = FileSystem.get(new URI("hdfs://mini-yum:9000"),conf,"hadoop");
	}

	/**
	 * 上传文件
	 * @throws Exception
	 */
	@Test
	public void testUpload() throws Exception {
		fs.copyFromLocalFile(new Path("H:/test/b.txt"), new Path("/ccc/3.txt"));
		fs.close();
	}
	
	
	/**
	 * 下载文件
	 * @throws Exception
	 */
	@Test
	public void testDownload() throws Exception {
		fs.copyToLocalFile(new Path("/b.txt"), new Path("H:/test/123.txt"));
	}
	
	
	/**
	 * 打印配置参数
	 */
	@Test
	public void testConf(){
		Iterator<Entry<String, String>> it = conf.iterator();
		while(it.hasNext()){
			Entry<String, String> ent = it.next();
			System.out.println(ent.getKey() + " : " + ent.getValue());
		}
		
	}
	
	
	//创建目录
	@Test
	public void testMkdir() throws Exception {
		boolean mkdirs = fs.mkdirs(new Path("/aaa/bbb"));
		System.out.println(mkdirs);
	}
	
	//删除
	@Test
	public void testDelete() throws Exception {
		boolean flag = fs.delete(new Path("/a"), true);
		System.out.println("是否删除成功："+flag);
	}
	
	
	/**
	 * 递归列出指定目录下所有子文件夹中的文件
	 * @throws Exception
	 */
	@Test
	public void testLs() throws Exception {
		RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
		while(listFiles.hasNext()){
			LocatedFileStatus fileStatus = listFiles.next();
			System.out.println("blocksize: " +fileStatus.getBlockSize());
			System.out.println("owner: " +fileStatus.getOwner());
			System.out.println("Replication: " +fileStatus.getReplication());
			System.out.println("Permission: " +fileStatus.getPermission());
			System.out.println("Name: " +fileStatus.getPath().getName());
			BlockLocation[] blockLocations = fileStatus.getBlockLocations();
			for(BlockLocation b:blockLocations){
				System.out.println("块起始偏移量: " +b.getOffset());
				System.out.println("块长度:" + b.getLength());
				//块所在的datanode节点
				String[] datanodes = b.getHosts();
				for(String dn:datanodes){
				System.out.println("datanode:" + dn);
				}
			}
			System.out.println("------------------");
		}
		
	}
	
	
	
	//查看当前路径的文件
	@Test
	public void testLs2() throws Exception {
		FileStatus[] listStatus = fs.listStatus(new Path("/"));
		for(FileStatus file :listStatus){
			System.out.println("name: " + file.getPath().getName());
			System.out.println((file.isFile()?"file":"directory"));
		}
		
	}
	
	
	/**
	 * 通过流的方式上传文件到hdfs
	 * @throws Exception
	 */
	@Test
	public void testUpload2() throws Exception {
		FSDataOutputStream outputStream = fs.create(new Path("/ccc/4.txt"), true);
		//d.txt
		FileInputStream inputStream = new FileInputStream("H:/test/d.txt");
		IOUtils.copy(inputStream, outputStream);
		
	}
	
	
	/**
	 * 通过流的方式获取hdfs上数据
	 * @throws Exception
	 */
	@Test
	public void testDownLoad2() throws Exception {
		FSDataInputStream inputStream = fs.open(new Path("/ccc/4.txt"));		
		FileOutputStream outputStream = new FileOutputStream("H:/test/d2.txt");
		IOUtils.copy(inputStream, outputStream);
	}
	
	/**
	 * 截取指定大小的文件（例如：下载文件可以指定从2字节打10字节的文件）
	 * 
	 */
	@Test
	public void testRandomAccess() throws Exception{
		FSDataInputStream inputStream = fs.open(new Path("/ccc/4.txt"));
	//截取第二个字节以后的所有字节
		inputStream.seek(2);
		FileOutputStream outputStream = new FileOutputStream("H:/test/d3.txt");
		IOUtils.copy(inputStream, outputStream);
	}
	
	
	
	/**
	 * 显示hdfs上文件的内容
	 * @throws IOException 
	 * @throws IllegalArgumentException 
	 */
	@Test
	public void testCat() throws IllegalArgumentException, IOException{
		FSDataInputStream in = fs.open(new Path("/ccc/4.txt"));
		System.out.println("--------------");
		IOUtils.copy(in, System.out);
		System.out.println("--------------");
	}
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
//		conf.set("fs.defaultFS", "hdfs://mini-yum:9000");
//		conf.set("dfs.replication", "5");
		//拿到一个文件系统操作的客户端实例对象
		FileSystem fs = FileSystem.get(new URI("hdfs://mini-yum:9000"),conf,"hadoop"); //最后一个参数为用户名
		fs.copyFromLocalFile(new Path("H:/test/c.txt"), new Path("/a"));
		fs.close();
	}
	

	
	
	
}

勤径苦舟

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
HDFS的Java api+步骤+api

步骤1下载hadoop源文件2因为要在window下测试，hadoop源文件需要编译，编译后下面2个文件下多出点内容，就是一些在当前window操作系统下能让hadoop跑起来的文件如下图所示（这都是编译过的(window7系统)，如果是window8系统，你需要在win8编译下源文件）这个文件我已上传https://download.csdn.net/download/zhou9...
复制链接

扫一扫