Linux下Hadoop hdfs Java API使用

最新推荐文章于 2023-06-28 16:25:29 发布

judyge

最新推荐文章于 2023-06-28 16:25:29 发布

阅读量523

点赞数

分类专栏：云计算

云计算专栏收录该内容

89 篇文章 1 订阅

订阅专栏

0 前言

搞了大约2天时间终于把Linux下面Java API的使用给弄清楚了。做个笔记方便以后参考。环境如下所示

Hadoop：2.5.1

Linux：Ubuntu kylin

eclipse：luna

1 步骤

首先是要去下载一个eclipse，这里使用的是Luna。名字比较好听，代表月亮消灭你们...

然后发现自带了maven，太棒了！Luna牛掰，毫无疑问创建maven工程，修改pom.xml文件为下面的内容

[html]view plaincopy 
    
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  
   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">  
   <modelVersion>4.0.0</modelVersion>  
   
   <groupId>maven</groupId>  
   <artifactId>maven</artifactId>  
   <version>0.0.1-SNAPSHOT</version>  
   <packaging>jar</packaging>  
   
   <name>maven</name>  
   <url>http://maven.apache.org</url>  
   
   <properties>  
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>  
   </properties>  
   
   <dependencies>  
     <dependency>  
       <groupId>junit</groupId>  
       <artifactId>junit</artifactId>  
       <version>3.8.1</version>  
       <scope>test</scope>  
     </dependency>  
      <dependency>  
           <groupId>org.apache.hadoop</groupId>  
           <artifactId>hadoop-minicluster</artifactId>  
           <version>2.5.1</version>  
     </dependency>  
     <dependency>  
           <groupId>org.apache.hadoop</groupId>  
           <artifactId>hadoop-client</artifactId>  
           <version>2.5.1</version>  
     </dependency>  
     <dependency>  
           <groupId>org.apache.hadoop</groupId>  
           <artifactId>hadoop-assemblies</artifactId>  
           <version>2.5.1</version>  
     </dependency>  
         <dependency>  
           <groupId>org.apache.hadoop</groupId>  
           <artifactId>hadoop-maven-plugins</artifactId>  
           <version>2.5.1</version>  
     </dependency>  
         <dependency>  
           <groupId>org.apache.hadoop</groupId>  
           <artifactId>hadoop-common</artifactId>  
           <version>2.5.1</version>  
     </dependency>  
         <dependency>  
           <groupId>org.apache.hadoop</groupId>  
           <artifactId>hadoop-hdfs</artifactId>  
           <version>2.5.1</version>  
     </dependency>  
   </dependencies>  
 </project>  

然后等待eclipse maven自动下载依赖的包。等啊等就好了，下一步是配置jvm运行的参数，因为运行的时候需要本地的库所以必须配置下。我的Hadoop是放在/home/hadoop-master/hadoop-2.5.1下的。

[html]view plaincopy 
    
 -Djava.library.path=/home/hadoop-master/hadoop-2.5.1/lib/native  

因为hadoop2.5.1自己已经编译好了本地库所以不用在编译一次了（这就是用新不用旧的原因，自己编译太费事儿了。。。。到此一切OK

2 测试代码

是驴子是马，拉出来溜溜。写个小程序跑跑。

[java]view plaincopy 
    
 package maven.maven;  
   
 import java.io.BufferedReader;  
 import java.io.BufferedWriter;  
 import java.io.File;  
 import java.io.FileInputStream;  
 import java.io.FileNotFoundException;  
 import java.io.IOException;  
 import java.io.InputStream;  
 import java.io.InputStreamReader;  
 import java.io.OutputStreamWriter;  
 import java.io.Writer;  
 import java.util.Date;  
   
 import org.apache.hadoop.conf.Configuration;  
 import org.apache.hadoop.fs.BlockLocation;  
 import org.apache.hadoop.fs.FSDataInputStream;  
 import org.apache.hadoop.fs.FSDataOutputStream;  
 import org.apache.hadoop.fs.FileStatus;  
 import org.apache.hadoop.fs.FileSystem;  
 import org.apache.hadoop.fs.Path;  
 import org.apache.hadoop.hdfs.DistributedFileSystem;  
 import org.apache.hadoop.hdfs.DFSClient.*;  
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;  
   
 public class HadoopFSOperations {  
       
     private static Configuration conf = new Configuration();  
     private static final String HADOOP_URL="hdfs://192.168.190.129:9000";  
       
     private static FileSystem fs;  
       
     private static DistributedFileSystem hdfs;  
       
     static {  
         try {  
             FileSystem.setDefaultUri(conf, HADOOP_URL);  
             fs = FileSystem.get(conf);  
             hdfs = (DistributedFileSystem)fs;  
         } catch (Exception e) {  
             e.printStackTrace();  
         }  
     }  
     /** 
      * 列出所有DataNode的名字信息 
      */  
     public void listDataNodeInfo() {          
         try {  
             DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();  
             String[] names = new String[dataNodeStats.length];  
             System.out.println("List of all the datanode in the HDFS cluster:");  
               
             for (int i=0;i<names.length;i++) {  
                 names[i] = dataNodeStats[i].getHostName();  
                 System.out.println(names[i]);  
             }  
             System.out.println(hdfs.getUri().toString());  
         } catch (Exception e) {  
             e.printStackTrace();  
         }  
     }  
       
     /** 
      * 查看文件是否存在 
      */  
     public void checkFileExist() {  
         try {  
             Path a= hdfs.getHomeDirectory();  
             System.out.println("main path:"+a.toString());  
               
             Path f = new Path("/user/xxx/input01/");  
             boolean exist = fs.exists(f);  
             System.out.println("Whether exist of this file:"+exist);  
               
             //删除文件  
 //          if (exist) {  
 //              boolean isDeleted = hdfs.delete(f, false);  
 //              if(isDeleted) {  
 //                  System.out.println("Delete success");  
 //              }                 
 //          }  
         } catch (Exception e) {  
             e.printStackTrace();  
         }  
     }  
       
     /** 
      *创建文件到HDFS系统上  
      */  
     public void createFile() {  
         try {  
             Path f = new Path("/user/xxx/input02/file01");  
             System.out.println("Create and Write :"+f.getName()+" to hdfs");  
               
             FSDataOutputStream os = fs.create(f, true);  
             Writer out = new OutputStreamWriter(os, "utf-8");//以UTF-8格式写入文件，不乱码  
             out.write("你好 good job");  
             out.close();  
             os.close();  
         } catch (Exception e) {  
             e.printStackTrace();  
         }  
     }  
       
       
     /** 
      * 读取本地文件到HDFS系统<br> 
      * 请保证文件格式一直是UTF-8，从本地->HDFS 
      */  
     public void copyFileToHDFS() {  
         try {  
             Path f = new Path("/user/xxx/input02/file01");  
             File file = new File("E:\\hadoopTest\\temporary.txt");  
               
             FileInputStream is = new FileInputStream(file);  
             InputStreamReader isr = new InputStreamReader(is, "utf-8");  
             BufferedReader br = new BufferedReader(isr);  
               
             FSDataOutputStream os = fs.create(f, true);  
             Writer out = new OutputStreamWriter(os, "utf-8");  
               
             String str = "";  
             while((str=br.readLine()) != null) {  
                 out.write(str+"\n");  
             }  
             br.close();  
             isr.close();  
             is.close();  
             out.close();  
             os.close();  
             System.out.println("Write content of file "+file.getName()+" to hdfs file "+f.getName()+" success");  
         } catch (Exception e) {  
             e.printStackTrace();  
         }  
     }  
       
     /** 
      * 取得文件块所在的位置.. 
      */  
     public void getLocation() {  
         try {  
             Path f = new Path("/user/xxx/input02/file01");  
             FileStatus fileStatus = fs.getFileStatus(f);  
               
             BlockLocation[] blkLocations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());  
             for (BlockLocation currentLocation : blkLocations) {  
                 String[] hosts = currentLocation.getHosts();  
                 for (String host : hosts) {  
                     System.out.println(host);  
                 }  
             }  
               
             //取得最后修改时间  
             long modifyTime = fileStatus.getModificationTime();  
             Date d = new Date(modifyTime);  
             System.out.println(d);  
         } catch (Exception e) {  
             e.printStackTrace();  
         }  
     }  
       
     /** 
      * 读取hdfs中的文件内容 
      */  
     public void readFileFromHdfs() {  
         try {  
             Path f = new Path("/user/xxx/input02/file01");  
               
             FSDataInputStream dis = fs.open(f);  
             InputStreamReader isr = new InputStreamReader(dis, "utf-8");  
             BufferedReader br = new BufferedReader(isr);  
             String str = "";  
             while ((str = br.readLine()) !=null) {  
                 System.out.println(str);  
             }  
             br.close();  
             isr.close();  
             dis.close();  
         } catch (Exception e) {  
             e.printStackTrace();  
         }  
     }  
       
     /** 
      * list all file/directory 
      * @param args 
      * @throws IOException  
      * @throws IllegalArgumentException  
      * @throws FileNotFoundException  
      */  
     public void listFileStatus(String path) throws FileNotFoundException, IllegalArgumentException, IOException {  
         FileStatus fileStatus[]=fs.listStatus(new Path(path));  
         int listlength=fileStatus.length;  
         for (int i=0 ;i<listlength ;i++){  
             if (fileStatus[i].isDirectory() == false) {  
                 System.out.println("filename:"  
                         + fileStatus[i].getPath().getName() + "\tsize:"  
                         + fileStatus[i].getLen());  
             } else {  
                 String newpath = fileStatus[i].getPath().toString();  
                 listFileStatus(newpath);  
             }  
         }  
     }  
       
     public static void main(String[] args) {  
         HadoopFSOperations a = new HadoopFSOperations();  
         a.listDataNodeInfo();  
 //      a.checkFileExist();  
 //      a.createFile();  
 //      a.copyFileToHDFS();  
 //      a.getLocation();  
 //      a.readFileFromHdfs();  
         try {  
             a.listFileStatus(HADOOP_URL+"/user");  
         } catch (FileNotFoundException e) {  
             // TODO Auto-generated catch block  
             e.printStackTrace();  
         } catch (IllegalArgumentException e) {  
             // TODO Auto-generated catch block  
             e.printStackTrace();  
         } catch (IOException e) {  
             // TODO Auto-generated catch block  
             e.printStackTrace();  
         }  
     }  
 }  

因为我的hadoop是在192.168.190.129上的所以private static final String HADOOP_URL="hdfs://192.168.190.129:9000";，请酌情修改。搞定跑起来，就能看到下面的结果

[html]view plaincopy 
    
 List of all the datanode in the HDFS cluster:  
 hadoopslaver0  
 hadoopslaver2  
 hadoopslaver1  
 hdfs://192.168.190.129:9000  
 filename:TrustCom2015_CFP.pdf   size:290401  
 filename:jd.PNG size:16647