首先需要安装配置HDFS和eclipse,此步骤可通过前面的博客查阅。运行此过程时可以考虑在eclipse中加入Hadoop插件对HDFS文件进行管理,此过程暂不做描述,可自行查找相关教程文档。
1. 通过终端开启Hadoop
$ start-all.sh
$jps
2. 打开eclipse,创建project,创建包和class
3. 导入jar包,Build Path → Configure Build Path... ,导入Hadoop下和文件lib下的所有jar包
4. 编写代码实现相应功能
- package com.jdk.hdfs;
-
- import java.io.IOException;
- import java.net.URI;
-
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FileStatus;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.FileUtil;
- import org.apache.hadoop.fs.Path;
-
- public class ls {
-
- /**
- * @param args
- * @throws IOException
- */
- public static void main(String[] args) throws IOException {
- // 查看文件夹
- // String dir = "/";
- // Configuration conf = new Configuration();
- conf.set("fs.defaultfs", "hdfs://196.168.152.141:9000/"); //如果是在本地运行,则可以直接使用conf,不需要加URI链接,如果运行的是集群,则需要添加URI链接,下同
- // FileSystem fs = FileSystem.get(conf);
- // FileStatus[] filestatus = fs.listStatus(new Path(dir));
- // Path[] list = FileUtil.stat2Paths(filestatus);
- // for (Path path : list){
- // System.out.println(path.toString());
- // }
- // fs.close();
-
- // 新建文件夹
- // Configuration conf = new Configuration();
- // FileSystem fs = FileSystem.get(URI.create("hdfs://196.168.152.141:9000/"),conf);
- // fs.mkdirs(new Path("/hi")); // 文件“hi”即为创建的文件夹
- // fs.close();
-
- // 删除文件夹
- // Configuration conf = new Configuration();
- // FileSystem fs = FileSystem.get(URI.create("hdfs://196.168.152.141:9000/"),conf);
- // fs.delete(new Path("/hi"),true); // 文件“hi”即为删除的文件夹
- // fs.close();
-
- // 导入文件
- // Configuration conf = new Configuration();
- // FileSystem fs = FileSystem.get(URI.create("hdfs://196.168.152.141:9000/"),conf);
- // Path src = new Path("需要导入的文件的路径");
- // Path dst = new Path("导入到HDFS的目标文件夹,如刚才创建的“/hi”文件");
- // fs.close();
-
- }
- }
4. 新建class,编写代码
- package com.jdk.hdfs;
-
- import java.io.BufferedReader;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.InputStreamReader;
-
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FSDataInputStream;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
-
- public class ReadHDFS {
-
- /**
- * @param args
- * @throws IOException
- */
- public static void main(String[] args) throws IOException {
- // 读取HDFS中的文件
- String dir = "HDFS中需要读取的目标文件及其目录";
- Configuration conf = new Configuration();
- FileSystem fs = FileSystem.get(conf);
- FSDataInputStream file = fs.open(new Path(dir));
- BufferedReader in = null;
- String line;
- in = new BufferedReader(new InputStreamReader(file,"UTF-8"));
- while((line = in.readLine()) != null){
- System.out.print(line);
- }
- if(in != null){
- in.close();
- }
- fs.close();
-
- }
-
- }