HDFS编程实践Java
Hadoop版本2(Hadoop3的代码有不同的地方,需要参考官方文档)
准备工作
-
下载安装Eclipse(安装在Windows上即可)
-
在Master的根目录上,创建myFile.txt和localFile.txt并写入任意内容
# 如果上次shell编程有做,先删除文件 $ cd ~ $ hadoop fs -rm -f /user/qinphy/myFile.txt $ rm -f myFile.txt $ rm -f localFile.txt # 创建文件 $ cd ~ $ touch myFile.txt $ vi myFile.txt This is myFile Hello HDFS! $ touch localFile.txt $ vi localFile.txt This is localFile Hello Hadoop! # input文件夹有的话,也要删除 $ hadoop fs -rm -R /user/qinphy/input
-
下载相关的jar包
本次实验只涉及到hadoop-common-2.7.1.jar包
如果需要做更大的程序,需要的所有依赖如下:
”/usr/local/hadoop/share/hadoop/common”目录下的hadoop-common-2.7.1.jar和haoop-nfs-2.7.1.jar;
/usr/local/hadoop/share/hadoop/common/lib”目录下的所有JAR包; “/usr/local/hadoop/share/hadoop/hdfs”目录下的haoop-hdfs-2.7.1.jar和haoop-hdfs-nfs-2.7.1.jar; “/usr/local/hadoop/share/hadoop/hdfs/lib”目录下的所有JAR包。
(可能有重复的jar包,覆盖即可)
$ cd /usr/local/hadoop/share/hadoop/common $ sz 选择hadoop-common-2.7.1.jar # 其他的包本次实验用不到,如果太多可以压缩zip下载下来。
在Elipse里面编程
-
新建工程
-
新建lib文件夹把jar包放进去
导入jar包:把hadoop-common-2.7.1.jar复制粘贴进入lib
-
新建两个包shell和myStream
-
新建class:BasicMethod
package shell; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.text.SimpleDateFormat; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; public class BasicMethod { /** * <h1>判断文件或者目录是否存在</h1> * @param conf 配置信息 * @param path 文件/目录的路径 * @return true:存在!false:不存在! */ public static boolean isExist(Configuration conf, String path) { try { FileSystem fs = FileSystem.get(conf); return fs.exists(new Path(path)); } catch (Exception e){ e.printStackTrace(); return false; } } /** * <h1>判断目录是否为空</h1> * @param conf 配置文件 * @param hdfsDirPath hdfs目录路径 * @return true:文件夹为空!false:文件夹不空! */ public static boolean isEmpty(Configuration conf, String hdfsDirPath) { try { FileSystem fs = FileSystem.get(conf); Path hdfsPath = new Path(hdfsDirPath); RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(hdfsPath, true); while(remoteIterator.hasNext()) { FileStatus s = remoteIterator.next(); return false; } return true; } catch(Exception e) { e.printStackTrace(); return false; } } /** * <h1>上传文件,覆盖</h1> * @param conf 配置信息 * @param localFilePath 本地文件路径 * @param hdfsFilePath 上传路径 * @return true:上传成功! false:上传失败! */ public static boolean put(Configuration conf, String localFilePath, String hdfsFilePath) { Path localPath = new Path(localFilePath); Path hdfsPath = new Path(hdfsFilePath); try { FileSystem fs = FileSystem.get(conf); fs.copyFromLocalFile(false, true, localPath, hdfsPath); return true; } catch (IOException e) { e.printStackTrace(); return false; } } /** * <h1>追加文件内容</h1> * @param conf 配置信息 * @param localFilePath 本地文件路径 * @param hdfsFilePath 上传路径 * @return true:追加成功! false:追加失败! */ public static boolean appendFile(Configuration conf, String localFilePath, String hdfsFilePath) { Path hdfsPath = new Path(hdfsFilePath); try { FileSystem fs = FileSystem.get(conf); FileInputStream in = new FileInputStream(localFilePath); FSDataOutputStream out = fs.append(hdfsPath); byte data[] = new byte[1024]; int read = -1; while((read = in.read(data)) > 0) { out.write(data, 0, read); } out.close(); return true; } catch (IOException e) { e.printStackTrace(); return false; } } /** * <h1>下载文件</h1> * @param conf 配置信息 * @param localFilePath 本地文件路径 * @param hdfsFilePath hdfs文件路径 * @return true:下载成功!false:下载失败! */ public static boolean get(Configuration conf, String localFilePath, String hdfsFilePath) { try { FileSystem fs = FileSystem.get(conf); Path localPath = new Path(localFilePath); Path hdfsPath = new Path(hdfsFilePath); fs.copyToLocalFile(hdfsPath, localPath); return true; } catch(IOException e) { e.printStackTrace(); return false; } } public static boolean download(Configuration conf, String localFilePath, String hdfsFilePath) { Path hdfsPath = new Path(hdfsFilePath); try { FileSystem fs = FileSystem.get(conf); File f = new File(localFilePath); if(f.exists()) { System.out.print("文件已存在,自动重命名: "); Integer a = Integer.valueOf(0); localFilePath = localFilePath.substring(0, localFilePath.length() - 4); while(f.exists()) { a++; f = new File(localFilePath + a.toString() + ".txt"); } localFilePath = localFilePath + a.toString() + ".txt"; System.out.println(localFilePath); } Path localPath = new Path(localFilePath); fs.copyToLocalFile(hdfsPath, localPath); return true; } catch (Exception e) { e.printStackTrace(); return false; } } /** * <h1>把文件内容输出到终端</h1> * @param conf 配置信息 * @param hdfsFilePath hdfs文件路径 */ public static void cat(Configuration conf, String hdfsFilePath) { Path hdfsPath = new Path(hdfsFilePath); try { FileSystem fs = FileSystem.get(conf); FSDataInputStream in = fs.open(hdfsPath); BufferedReader d = new BufferedReader(new InputStreamReader(in)); String line; while((line = d.readLine()) != null) { System.out.println(line); } fs.close(); } catch(IOException e) { e.printStackTrace(); } } /** * <h1>查看文件权限</h1> * @param conf 配置信息 * @param hdfsFilePath hdfs文件路径 */ public static void lsFile(Configuration conf, String hdfsFilePath) { try { FileSystem fs = FileSystem.get(conf); Path hdfsPath = new Path(hdfsFilePath); FileStatus[] fileStatus = fs.listStatus(hdfsPath); for (FileStatus s : fileStatus) { System.out.println("路径:" + s.getPath().toString()); System.out.println("权限:" + s.getPermission().toString()); System.out.println("大小:" + s.getLen()); long time = s.getModificationTime(); SimpleDateFormat format = new SimpleDateFormat("YY-MM-DD HH:MM:SS"); String date = format.format(time); System.out.println("时间:" + date); } } catch(IOException e) { e.printStackTrace(); } } /** * <h1>查看目录文件信息</h1> * @param conf 配置信息 * @param hdfsDirPath hdfs目录路径 */ public static void lsDir(Configuration conf, String hdfsDirPath) { try { FileSystem fs = FileSystem.get(conf); Path hdfsPath = new Path(hdfsDirPath); RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(hdfsPath, true); while(remoteIterator.hasNext()) { FileStatus s = remoteIterator.next(); System.out.println("路径:" + s.getPath().toString()); System.out.println("权限:" + s.getPermission().toString()); System.out.println("大小:" + s.getLen()); long time = s.getModificationTime(); SimpleDateFormat format = new SimpleDateFormat("YY-MM-DD HH:MM:SS"); String date = format.format(time); System.out.println("时间:" + date); System.out.println(); } } catch(Exception e) { e.printStackTrace(); } } /** * <h1>创建目录</h1> * @param conf 配置文件 * @param hdfsDirPath hdfs目录路径 * @return true:创建成功!false:创建失败! */ public static boolean mkdir(Configuration conf, String hdfsDirPath) { try { FileSystem fs = FileSystem.get(conf); Path hdfsPath = new Path(hdfsDirPath); return fs.mkdirs(hdfsPath); } catch(IOException e) { e.printStackTrace(); return false; } } /** * <h1>创建文件</h1> * @param conf 配置信息 * @param hdfsFilePath hdfs文件路径 * @return true:创建成功!false:创建失败! */ public static boolean touchz(Configuration conf, String hdfsFilePath) { try { Path hdfsPath = new Path(hdfsFilePath); FileSystem fs = FileSystem.get(conf); FSDataOutputStream out = fs.create(hdfsPath); out.close(); return true; } catch(IOException e) { e.printStackTrace(); return false; } } /** * <h1>删除目录或者文件</h1> * @param conf 配置文件 * @param hdfsFilePath 删除hdfs文件路径 * @return true:删除成功!false:删除失败! */ public static boolean rm(Configuration conf, String hdfsFilePath) { try { FileSystem fs = FileSystem.get(conf); Path hdfsPath = new Path(hdfsFilePath); // false:删除文件和空目录 // true:删除目录下有文件会递归删除 return fs.delete(hdfsPath, true); } catch(IOException e) { e.printStackTrace(); return false; } } /** * <h1>移动文件</h1> * @param conf 配置文件 * @param hdfsFilePath1 需要转移的文件路径 * @param hdfsFilePath2 转移的目的文件路径 */ public static void mv(Configuration conf, String hdfsFilePath1, String hdfsFilePath2) { try { FileSystem fs = FileSystem.get(conf); String localFilePath = "/home/qinphy/tmp.txt"; Path localPath = new Path(localFilePath); Path hdfsPath1 = new Path(hdfsFilePath1); Path hdfsPath2 = new Path(hdfsFilePath2); fs.moveToLocalFile(hdfsPath1, localPath); fs.moveFromLocalFile(localPath, hdfsPath2); File file = new File(localFilePath); file.delete(); } catch(IOException e) { e.printStackTrace(); } } }
-
新建class:Action
package shell; import java.io.File; import java.util.Scanner; import org.apache.hadoop.conf.Configuration; public class Action { private String localFilePath; private String hdfsFilePath; private Configuration conf; public String getLocalFilePath() { return localFilePath; } public void setLocalFilePath(String localFilePath) { this.localFilePath = localFilePath; } public String getHdfsFilePath() { return hdfsFilePath; } public void setHdfsFilePath(String hdfsFilePath) { this.hdfsFilePath = hdfsFilePath; } public void setConf(Configuration conf) { this.conf = conf; } public Action(Configuration conf, String localFilePath, String hdfsFilePath) { super(); this.localFilePath = localFilePath; this.hdfsFilePath = hdfsFilePath; this.conf = conf; } public void upload() { Scanner input = new Scanner(System.in); File file = new File(localFilePath); if (file.exists()) { if (BasicMethod.isExist(conf, hdfsFilePath)) { System.out.print("上传文件已存在:1.追加 2.覆盖:"); String str = input.nextLine(); if (str.equals("1")) { if (BasicMethod.appendFile(conf, localFilePath, hdfsFilePath)) { System.out.println("追加成功!"); } else { System.out.println("追加失败!"); } } else { if (BasicMethod.put(conf, localFilePath, hdfsFilePath)) { System.out.println("覆盖成功!"); } else { System.out.println("覆盖失败!"); } } } else { if (BasicMethod.put(conf, localFilePath, hdfsFilePath)) { System.out.println("上传成功!"); } else { System.out.println("上传失败!"); } } } else { System.out.println("无法上传:本地文件不存在!"); } } public void download() { if (BasicMethod.isExist(conf, hdfsFilePath)) { File file = new File(localFilePath); if (file.exists()) { System.out.println("检测到本地文件存在,自动重命名!"); Integer a = Integer.valueOf(0); String newLocalPath = localFilePath.substring(0, localFilePath.length() - 4); while(file.exists()) { a++; file = new File(newLocalPath + a.toString() + ".txt"); } newLocalPath = newLocalPath + a.toString() + ".txt"; BasicMethod.get(conf, newLocalPath, hdfsFilePath); System.out.println("下载成功:" + newLocalPath); } else { BasicMethod.get(conf, localFilePath, hdfsFilePath); System.out.println("下载成功!"); } } else { System.out.println("无法下载:hdfs文件不存在!"); } } public void mkfile(String hdfsFilePath) { int index = hdfsFilePath.lastIndexOf('/'); String hdfsDirPath = hdfsFilePath.substring(0, index); if (BasicMethod.isExist(conf, hdfsDirPath)) { if (BasicMethod.isExist(conf, hdfsFilePath)) { System.out.println("创建失败:文件已存在!"); } else { if (BasicMethod.touchz(conf, hdfsFilePath)) { System.out.println("创建成功!"); } else { System.out.println("创建失败!"); } } } else { System.out.println("路径不存在:"); if (BasicMethod.mkdir(conf, hdfsDirPath)) { System.out.println("已创建路径:" + hdfsDirPath); } if (BasicMethod.touchz(conf, hdfsFilePath)) { System.out.println("文件创建成功!"); } else { System.out.println("创建失败!"); } } } public void delfile(String hdfsFilePath) { if (BasicMethod.isExist(conf, hdfsFilePath)) { if (BasicMethod.rm(conf, hdfsFilePath)) { System.out.println("删除成功!"); } else { System.out.println("删除失败!"); } } else { System.out.println(hdfsFilePath + "无法删除:文件不存在!"); } } public void mkdir(String hdfsDirPath) { if (BasicMethod.isExist(conf, hdfsDirPath)) { System.out.println("无法创建:目录路径已存在!"); } else { if (BasicMethod.mkdir(conf, hdfsDirPath)) { System.out.println("目录创建成功!"); } else { System.out.println("目录创建失败!"); } } } public void deldir(String hdfsDirPath) { Scanner input = new Scanner(System.in); if (BasicMethod.isExist(conf, hdfsDirPath)) { if (BasicMethod.isEmpty(conf, hdfsDirPath)) { BasicMethod.rm(conf, hdfsDirPath); System.out.println("目录已删除!"); } else { System.out.print("目录不为空,是否全部删除?Y/N: "); String str = input.nextLine(); if (str.equals("Y")) { BasicMethod.rm(conf, hdfsDirPath); System.out.println("已删除目录!"); } else { System.out.println("已取消操作!"); } } } else { System.out.println("无法删除:目录不存在!"); } } public void mvfile(String hdfsFilePath1, String hdfsFilePath2) { if (BasicMethod.isExist(conf, hdfsFilePath1)) { if (BasicMethod.isExist(conf, hdfsFilePath2)) { System.out.println(hdfsFilePath2 + "文件存在!"); } else { BasicMethod.mv(conf, hdfsFilePath1, hdfsFilePath2); System.out.println("成功:" + hdfsFilePath1 + "移动到" + hdfsFilePath2); } } else { System.out.println(hdfsFilePath1 + ": 路径不存在!"); } } public void append(String localFilePath) { Scanner input = new Scanner(System.in); File file = new File(localFilePath); if (BasicMethod.isExist(conf, hdfsFilePath) && file.exists()) { System.out.println("追加位置选择:1.文首 2.文末"); String str = input.nextLine(); if (str.equals("1")) { String localPath = "/home/qinphy/tmp.txt"; if (BasicMethod.get(conf, localPath, hdfsFilePath) && BasicMethod.rm(conf, hdfsFilePath) && BasicMethod.put(conf, localFilePath, hdfsFilePath) && BasicMethod.appendFile(conf, localPath, hdfsFilePath)) { File f = new File(localPath); f.delete(); System.out.println("文首追加成功!"); } else { System.out.println("文首追加失败!"); } } else { if (BasicMethod.appendFile(conf, localFilePath, hdfsFilePath)) { System.out.println("追加成功!"); } else { System.out.println("追加失败!"); } } } else { if (!BasicMethod.isExist(conf, hdfsFilePath)) { System.out.println("无法追加:" + hdfsFilePath +"文件不存在!"); } else { System.out.println("无法追加:" + localFilePath +"文件不存在!"); } } } public void lsFile(String hdfsFilePath) { if (BasicMethod.isExist(conf, hdfsFilePath)) { BasicMethod.lsFile(conf, hdfsFilePath); } else { System.out.println("指定文件不存在"); } } public void lsDir(String hdfsDirPath) { if (BasicMethod.isExist(conf, hdfsDirPath)) { BasicMethod.lsDir(conf, hdfsDirPath); } else { System.out.println("指定目录不存在!"); } } public void show(String hdfsFilePath) { if (BasicMethod.isExist(conf, hdfsFilePath)) { BasicMethod.cat(conf, hdfsFilePath); } else { System.out.println("指定文件不存在!"); } } public void test() { String localFilePath = "/home/qinphy/myFile.txt"; String localFilePath2 = "/home/qinphy/localFile.txt"; String hdfsFilePath = "/user/qinphy/myFile.txt"; String hdfsFilePath2 = "/user/qinphy/input/myFile.txt"; String hdfsPath1 = "/user/qinphy"; String hdfsDirPath = "/user/qinphy/input"; System.out.println(); System.out.println("----------test:"); System.out.println("hdfs所有文件:"); lsDir(hdfsPath1); if (BasicMethod.isExist(conf, hdfsDirPath)) { System.out.println(hdfsDirPath + "路径存在!"); } if (BasicMethod.isExist(conf, hdfsFilePath)) { System.out.println(hdfsFilePath + "文件内容:"); BasicMethod.cat(conf, hdfsFilePath); } if (BasicMethod.isExist(conf, hdfsFilePath2)) { System.out.println(hdfsFilePath2 + "文件内容:"); BasicMethod.cat(conf, hdfsFilePath2); } System.out.println(); System.out.println(); System.out.println("本地文件有:"); File localFile1 = new File(localFilePath); File localFile2 = new File(localFilePath2); if (localFile1.exists()) { System.out.println(localFilePath); } if (localFile2.exists()) { System.out.println(localFilePath2); } String path = localFilePath.substring(0, localFilePath.length() - 4); Integer a = 0; while(a <= 10) { File file = new File(path + a.toString() + ".txt"); if(file.exists()) { System.out.println(path + a.toString() + ".txt"); } a++; } System.out.println("----------end;"); System.out.println(); } }
-
新建class:Main
package shell; import org.apache.hadoop.conf.Configuration; public class Main { public static void main(String[] args) { // TODO Auto-generated method stub String localFilePath = "/home/qinphy/myFile.txt"; String localFilePath2 = "/home/qinphy/localFile.txt"; String hdfsFilePath = "/user/qinphy/myFile.txt"; String hdfsFilePath2 = "/user/qinphy/input/myFile.txt"; String hdfsPath1 = "/user/qinphy"; String hdfsPath2 = "/user/qinphy/input"; Configuration conf = new Configuration(); Action act = new Action(conf, localFilePath, hdfsFilePath); System.out.println("=============1. 上传文件" + localFilePath + ",三次不同选择。============="); System.out.println("1.1 第一次上传:"); act.upload(); // 上传成功 act.test(); System.out.println("1.2 第二次上传:"); act.upload(); // 上传追加 act.test(); System.out.println("1.3 第三次上传:"); act.upload(); // 上传覆盖 act.test(); System.out.println("=============2. 追加文件:" + localFilePath2 + "追加到" + hdfsFilePath + "============="); act.append(localFilePath2); act.test(); System.out.println("3. 下载文件。"); act.download(); act.test(); System.out.println("4. 创建" + hdfsFilePath + "文件。"); System.out.println("4.1 先删除原文件:"); act.delfile(hdfsFilePath); act.test(); System.out.println("4.2 创建文件:"); act.mkfile(hdfsFilePath); act.test(); System.out.println("5. 删除目录,多情况测试。"); System.out.println("4.1 先创建空目录:" + hdfsPath2); act.mkdir(hdfsPath2); act.test(); System.out.println("4.2 删除空目录:"); act.deldir(hdfsPath2); System.out.println("4.3 创建空目录:" + hdfsPath2); act.mkdir(hdfsPath2); act.test(); System.out.println("4.4 在input里面创建文件"); act.mkfile(hdfsFilePath2); act.test(); System.out.println("4.5 删除非空目录:"); act.deldir(hdfsPath2); act.test(); System.out.println("5. 移动文件:"); System.out.println("创建空目录:" + hdfsPath2); act.mkdir(hdfsPath2); act.test(); System.out.println("移动文件,把" + hdfsFilePath + "移动到" + hdfsFilePath2); act.mvfile(hdfsFilePath, hdfsFilePath2); act.test(); } }
-
在myStream里面新建class:MyFSDataInputStream
package myStream; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class MyFSDataInputStream extends FSDataInputStream { public MyFSDataInputStream(InputStream in) { super(in); } public static String readLine(Configuration conf, String hdfsFilePath) { try { FileSystem fs = FileSystem.get(conf); Path hdfsPath = new Path(hdfsFilePath); FSDataInputStream in = fs.open(hdfsPath); BufferedReader b = new BufferedReader(new InputStreamReader(in)); String line = null; if((line = b.readLine()) != null) { b.close(); in.close(); return line; } return null; } catch(IOException e) { e.printStackTrace(); return null; } } public static void main(String[] args) { // TODO Auto-generated method stub Configuration conf = new Configuration(); String hdfsFilePath = "/user/qinphy/myFile.txt"; System.out.println("读取文件:"); System.out.println(MyFSDataInputStream.readLine(conf, hdfsFilePath)); System.out.println("\n读取完毕!"); } }
-
编译,到处jar包
点击Main,运行。
点击MyFSDataInputStream,运行。
忽略错误,达到编译的目的即可。
工程结构如下:
到处jar包:(编译之后,才能导出)
找到:Main和MyFSDataInputStream导出。
选择导出的文件夹,在Windows的下载里面。
-
连接到Master
$ cd ~/Downloads $ rz 上传Main.jar和MyFSDataInputStream.jar $ cd ~ $ hadoop -jar ~/Downloads/Main.jar ... 1 ... 2 ... Y ... $ hadoop -jar ~/MyFSDataInputStream.jar ...