Linux下Hadoop hdfs Java API使用

最新推荐文章于 2024-05-16 15:27:38 发布

gcangle

最新推荐文章于 2024-05-16 15:27:38 发布

阅读量8k

点赞数 1

分类专栏： hadoop 文章标签： hdfs hadoop java

本文链接：https://blog.csdn.net/gcangle/article/details/41787405

版权

hadoop 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

0 前言

搞了大约2天时间终于把Linux下面Java API的使用给弄清楚了。做个笔记方便以后参考。环境如下所示

Hadoop：2.5.1

Linux：Ubuntu kylin

eclipse：luna

1 步骤

首先是要去下载一个eclipse，这里使用的是Luna。名字比较好听，代表月亮消灭你们...

然后发现自带了maven，太棒了！Luna牛掰，毫无疑问创建maven工程，修改pom.xml文件为下面的内容

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>maven</groupId>
  <artifactId>maven</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>jar</packaging>

  <name>maven</name>
  <url>http://maven.apache.org</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  </properties>

  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>
     <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-minicluster</artifactId>
          <version>2.5.1</version>
    </dependency>
    <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-client</artifactId>
          <version>2.5.1</version>
    </dependency>
    <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-assemblies</artifactId>
          <version>2.5.1</version>
    </dependency>
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-maven-plugins</artifactId>
          <version>2.5.1</version>
    </dependency>
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-common</artifactId>
          <version>2.5.1</version>
    </dependency>
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-hdfs</artifactId>
          <version>2.5.1</version>
    </dependency>
  </dependencies>
</project>

然后等待eclipse maven自动下载依赖的包。等啊等就好了，下一步是配置jvm运行的参数，因为运行的时候需要本地的库所以必须配置下。我的Hadoop是放在/home/hadoop-master/hadoop-2.5.1下的。

-Djava.library.path=/home/hadoop-master/hadoop-2.5.1/lib/native

因为hadoop2.5.1自己已经编译好了本地库所以不用在编译一次了（这就是用新不用旧的原因，自己编译太费事儿了。。。。到此一切OK

2 测试代码

是驴子是马，拉出来溜溜。写个小程序跑跑。

package maven.maven;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Date;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.DFSClient.*;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;

public class HadoopFSOperations {
	
	private static Configuration conf = new Configuration();
	private static final String HADOOP_URL="hdfs://192.168.190.129:9000";
	
	private static FileSystem fs;
	
	private static DistributedFileSystem hdfs;
	
	static {
		try {
			FileSystem.setDefaultUri(conf, HADOOP_URL);
			fs = FileSystem.get(conf);
			hdfs = (DistributedFileSystem)fs;
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	/**
	 * 列出所有DataNode的名字信息
	 */
	public void listDataNodeInfo() {		
		try {
			DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();
			String[] names = new String[dataNodeStats.length];
			System.out.println("List of all the datanode in the HDFS cluster:");
			
			for (int i=0;i<names.length;i++) {
				names[i] = dataNodeStats[i].getHostName();
				System.out.println(names[i]);
			}
			System.out.println(hdfs.getUri().toString());
 		} catch (Exception e) {
 			e.printStackTrace();
 		}
	}
	
	/**
	 * 查看文件是否存在
	 */
	public void checkFileExist() {
		try {
			Path a= hdfs.getHomeDirectory();
			System.out.println("main path:"+a.toString());
			
			Path f = new Path("/user/xxx/input01/");
			boolean exist = fs.exists(f);
			System.out.println("Whether exist of this file:"+exist);
			
			//删除文件
//			if (exist) {
//				boolean isDeleted = hdfs.delete(f, false);
//				if(isDeleted) {
//					System.out.println("Delete success");
//				}				
//			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	/**
	 *创建文件到HDFS系统上 
	 */
	public void createFile() {
		try {
			Path f = new Path("/user/xxx/input02/file01");
			System.out.println("Create and Write :"+f.getName()+" to hdfs");
			
			FSDataOutputStream os = fs.create(f, true);
			Writer out = new OutputStreamWriter(os, "utf-8");//以UTF-8格式写入文件，不乱码
			out.write("你好 good job");
			out.close();
			os.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	
	/**
	 * 读取本地文件到HDFS系统<br>
	 * 请保证文件格式一直是UTF-8，从本地->HDFS
	 */
	public void copyFileToHDFS() {
		try {
			Path f = new Path("/user/xxx/input02/file01");
			File file = new File("E:\\hadoopTest\\temporary.txt");
			
			FileInputStream is = new FileInputStream(file);
			InputStreamReader isr = new InputStreamReader(is, "utf-8");
			BufferedReader br = new BufferedReader(isr);
			
			FSDataOutputStream os = fs.create(f, true);
			Writer out = new OutputStreamWriter(os, "utf-8");
			
			String str = "";
			while((str=br.readLine()) != null) {
				out.write(str+"\n");
			}
			br.close();
			isr.close();
			is.close();
			out.close();
			os.close();
			System.out.println("Write content of file "+file.getName()+" to hdfs file "+f.getName()+" success");
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 取得文件块所在的位置..
	 */
	public void getLocation() {
		try {
			Path f = new Path("/user/xxx/input02/file01");
			FileStatus fileStatus = fs.getFileStatus(f);
			
			BlockLocation[] blkLocations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
			for (BlockLocation currentLocation : blkLocations) {
				String[] hosts = currentLocation.getHosts();
				for (String host : hosts) {
					System.out.println(host);
				}
			}
			
			//取得最后修改时间
			long modifyTime = fileStatus.getModificationTime();
			Date d = new Date(modifyTime);
			System.out.println(d);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 读取hdfs中的文件内容
	 */
	public void readFileFromHdfs() {
		try {
			Path f = new Path("/user/xxx/input02/file01");
			
			FSDataInputStream dis = fs.open(f);
			InputStreamReader isr = new InputStreamReader(dis, "utf-8");
			BufferedReader br = new BufferedReader(isr);
			String str = "";
			while ((str = br.readLine()) !=null) {
				System.out.println(str);
			}
			br.close();
			isr.close();
			dis.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * list all file/directory
	 * @param args
	 * @throws IOException 
	 * @throws IllegalArgumentException 
	 * @throws FileNotFoundException 
	 */
	public void listFileStatus(String path) throws FileNotFoundException, IllegalArgumentException, IOException {
		FileStatus fileStatus[]=fs.listStatus(new Path(path));
		int listlength=fileStatus.length;
		for (int i=0 ;i<listlength ;i++){
			if (fileStatus[i].isDirectory() == false) {
				System.out.println("filename:"
						+ fileStatus[i].getPath().getName() + "\tsize:"
						+ fileStatus[i].getLen());
			} else {
				String newpath = fileStatus[i].getPath().toString();
				listFileStatus(newpath);
			}
		}
	}
	
	public static void main(String[] args) {
		HadoopFSOperations a = new HadoopFSOperations();
		a.listDataNodeInfo();
//		a.checkFileExist();
//		a.createFile();
//		a.copyFileToHDFS();
//		a.getLocation();
//		a.readFileFromHdfs();
		try {
			a.listFileStatus(HADOOP_URL+"/user");
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IllegalArgumentException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}

因为我的hadoop是在192.168.190.129上的所以private static final String HADOOP_URL="hdfs://192.168.190.129:9000";，请酌情修改。搞定跑起来，就能看到下面的结果

List of all the datanode in the HDFS cluster:
hadoopslaver0
hadoopslaver2
hadoopslaver1
hdfs://192.168.190.129:9000
filename:TrustCom2015_CFP.pdf	size:290401
filename:jd.PNG	size:16647

可以看到三个datanode hadoopslaver0,1,2 以及/user下事先放好的文件。小实验成功