hdfs javaAPI编程

月亮邮递员616

已于 2024-09-28 11:45:04 修改

阅读量274

点赞数 7

分类专栏：大数据平台与架构文章标签： hdfs 大数据

于 2023-12-29 10:36:19 首次发布

本文链接：https://blog.csdn.net/qq_70336944/article/details/135285239

版权

大数据平台与架构专栏收录该内容

3 篇文章 0 订阅

订阅专栏

1.判断文件存在--不存在则创建

//判断文件存在--不存在则创建
package test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FSDataInputStream;

public class zzp {
        public static void main(String[] args) {
                try {
                        String filename = "2107381227朱子佩";
                        Configuration conf = new Configuration();
                        conf.set("fs.defaultFS","hdfs://localhost:9000");
                        conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
                        FileSystem fs = FileSystem.get(conf);
                        if(fs.exists(new Path(filename))){
                            System.out.println("文件存在");
                        }else{	
                        //System.out.println("不存在");
                        fs.createNewFile(new Path(filename));
                        System.out.println("文件创建成功");
                        }
                        fs.close();
                } catch (Exception e) {
                        e.printStackTrace();
                }
        }
}

2.写入文件

//写入文件
        import org.apache.hadoop.conf.Configuration;  
        import org.apache.hadoop.fs.FileSystem;
        import org.apache.hadoop.fs.FSDataOutputStream;
        import org.apache.hadoop.fs.Path;
 
        public class Chapter3 {    
                public static void main(String[] args) { 
                        try {
                                Configuration conf = new Configuration();  
                                conf.set("fs.defaultFS","hdfs://localhost:9000");
                                conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
                                FileSystem fs = FileSystem.get(conf);
                                byte[] buff = "Hello world".getBytes(); // 要写入的内容
                                String filename = "filename"; //要写入的文件名
                                FSDataOutputStream os = fs.create(new Path(filename));
                                os.write(buff,0,buff.length);
                                System.out.println("Create:"+ filename);
                                os.close();
                                fs.close();
                        } catch (Exception e) {  
                                e.printStackTrace();  
                        }  
                }  
        }

3.读取文件一行--全部

//读取文件一行--全部
import java.io.BufferedReader;
import java.io.InputStreamReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FSDataInputStream;
 
public class Chapter3 {
       public static void main(String[] args) {
              try {
                   Configuration conf = new Configuration();
                   conf.set("fs.defaultFS","hdfs://localhost:9000");                                            
                   conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
                   FileSystem fs = FileSystem.get(conf);
                   Path file = new Path("test"); 
                   FSDataInputStream getIt = fs.open(file);
                   BufferedReader d = new BufferedReader(new InputStreamReader(getIt));
                   //String content = d.readLine(); //读取文件一行
                   //System.out.println(content);
                   String nextline="";
                   while(( nextline=d.readLine() )!=null){ //读取全部内容
                         System.out.println(nextline);
                   }
                   d.close(); //关闭文件
                   fs.close(); //关闭hdfs
                   } catch (Exception e) {
                           e.printStackTrace();
                   }
                }
        }

1、2、3综合

4.追加写入文件并读取

//追加写入文件并读取
package test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FSDataInputStream;

public class pz {
        public static void main(String[] args) {
                    try {
                            String filename = "2107381227朱子佩";
                            Configuration conf = new Configuration();
                            conf.set("fs.defaultFS","hdfs://localhost:9000");
                            conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
                            FileSystem fs = FileSystem.get(conf);  
                            if(fs.exists(new Path(filename))){
                                System.out.println("文件存在");
                                //追加写
	                        	FSDataOutputStream outputStream = fs.append(new Path(filename));
	                        	byte[] buff = "个人职业规划：考编，实习，培训".getBytes("UTF-8"); // 要写入的内容
	                        	outputStream.write(buff,0,buff.length);   
	                            outputStream.close();
                                //读取文件内容
	                            FSDataInputStream getIt = fs.open(new Path(filename));
	                            BufferedReader d = new BufferedReader(new InputStreamReader(getIt));
	                            String line;
                            	while((line=d.readLine())!=null){
                                    System.out.println(line);
                               }         
                            	d.close();
                            }else{	
	                            	System.out.println("不存在");
                            }
                            
                            fs.close();
                } catch (Exception e) {
                        e.printStackTrace();
                }
        }
}

5.过滤掉.abc文件

cd /usr/local/hadoop

./bin/hdfs dfs -ls /user/hadoop

~~./bin/hdfs dfs -cat /user/hadoop/merge.txt~~

cd /usr/local/hadoop

mkdir myapp

./bin/hadoop jar ./myapp/HDFSExample.jar

./bin/hdfs dfs -ls /user/hadoop

./bin/hdfs dfs -cat /user/hadoop/merge.txt

//过滤掉.abc文件
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
 
/**
 * 过滤掉文件名满足特定条件的文件 
 */
class MyPathFilter implements PathFilter {
     String reg = null; 
     MyPathFilter(String reg) {
          this.reg = reg;
     }
     public boolean accept(Path path) {
        if (!(path.toString().matches(reg)))
            return true;
        return false;
    }
}
/***
 * 利用FSDataOutputStream和FSDataInputStream合并HDFS中的文件
 */
public class MergeFile {
    Path inputPath = null; //待合并的文件所在的目录的路径
    Path outputPath = null; //输出文件的路径
    public MergeFile(String input, String output) {
        this.inputPath = new Path(input);
        this.outputPath = new Path(output);
    }
    public void doMerge() throws IOException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://localhost:9000");
          conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
        FileSystem fsSource = FileSystem.get(URI.create(inputPath.toString()), conf);
        FileSystem fsDst = FileSystem.get(URI.create(outputPath.toString()), conf);
                //下面过滤掉输入目录中后缀为.abc的文件
        FileStatus[] sourceStatus = fsSource.listStatus(inputPath,
                new MyPathFilter(".*\\.abc")); 
        FSDataOutputStream fsdos = fsDst.create(outputPath);
        PrintStream ps = new PrintStream(System.out);
        //下面分别读取过滤之后的每个文件的内容，并输出到同一个文件中
        for (FileStatus sta : sourceStatus) {
            //下面打印后缀不为.abc的文件的路径、文件大小
            System.out.print("路径：" + sta.getPath() + "    文件大小：" + sta.getLen()
                    + "   权限：" + sta.getPermission() + "   内容：");
            FSDataInputStream fsdis = fsSource.open(sta.getPath());
            byte[] data = new byte[1024];
            int read = -1;
 
            while ((read = fsdis.read(data)) > 0) {
                ps.write(data, 0, read);
                fsdos.write(data, 0, read);
            }
            fsdis.close();          
        }
        ps.close();
        fsdos.close();
    }
    public static void main(String[] args) throws IOException {
        MergeFile merge = new MergeFile(
                "hdfs://localhost:9000/user/hadoop/",
                "hdfs://localhost:9000/user/hadoop/merge.txt");
        merge.doMerge();
    }
}