HDFS JAVA API相关的操作方法

最新推荐文章于 2023-04-17 19:06:04 发布

weixin_37886463

最新推荐文章于 2023-04-17 19:06:04 发布

阅读量246

点赞数

分类专栏： hadoop知识点

本文链接：https://blog.csdn.net/weixin_37886463/article/details/79464633

版权

6 篇文章 0 订阅

订阅专栏

 
 import java.io.File; 

  import java.io.FileInputStream; 

  import org.apache.hadoop.conf.Configuration; 

  import org.apache.hadoop.fs.FSDataInputStream; 

  import org.apache.hadoop.fs.FSDataOutputStream; 

  import org.apache.hadoop.fs.FileStatus; 

  import org.apache.hadoop.fs.FileSystem; 

  import org.apache.hadoop.fs.Path; 

  import org.apache.hadoop.io.IOUtils; 

  /*包含了HDFS的基本操作*/ 

  public class HdfsApp { 

/**

 
  * 创建文件系统*/ 

  public static FileSystem getFileSystem() throws Exception{ 

  Configuration conf = new Configuration(); 

  // conf.set("fs.defaultFS", "hdfs://myhadoop:9000"); 

  //创建一个文件系统 

  FileSystem fileSystem = FileSystem.get(conf); 

  return fileSystem; 

}

/**

 
  * 读文件*/ 

  public static void readFile(String fileName) throws Exception{ 

  FileSystem fileSystem = getFileSystem(); 

  //构造一个路径对象 

  Path path = new Path(fileName); 

  //构造输入流对象FSDataInputStream 

  FSDataInputStream fsDataInputStream = fileSystem.open(path); 

  try{ 

  //这里使用的是System.out的PrintStream输出流对象 

  IOUtils.copyBytes(fsDataInputStream, System.out, 4096, false); 

  }catch(Exception e){ 

  e.printStackTrace(); 

  }finally{ 

  IOUtils.closeStream(fsDataInputStream); 

}

}

/**

 
  * 写文件 

 
  * @param 文件名称*/ 

  public static void writeFile(String sourceFile,String targetFile) throws Exception{ 

  FileSystem fileSystem = getFileSystem(); 

  Path path = new Path(targetFile); 

  FSDataOutputStream fsDataOutputStream = fileSystem.create(path); 

  //构造输入流对象 

  FileInputStream inputStream = new FileInputStream(new File(sourceFile)); 

  try{ 

  IOUtils.copyBytes(inputStream, fsDataOutputStream, 4096, false); 

  }catch(Exception e){ 

  e.printStackTrace(); 

  }finally{ 

  IOUtils.closeStream(fsDataOutputStream); 

  IOUtils.closeStream(inputStream); 

  System.out.println("write completed"); 

}

}

 
 /**带有通配符的文件操作===通过通配符实现目录筛选 

 
  * @param 带通配符的文件路径名称 

 
  * 此外还可以通过自定义一个类实现PathFilter接口，然后在fileSystem的globStatus中传入该参数即可， 

 
  * 这样就可以留下与过滤器不匹配的文件 

 
  * */ 

  public static void getDispInfo(String dispFileName) throws Exception{ 

  FileSystem fileSystem = getFileSystem(); 

  FileStatus[] fileStatus = fileSystem.globStatus(new Path(dispFileName)); 

  //注意：FileStatus对象封装了文件系统中文件和目录的元数据，包括文件的长度、块大小、备份数、修改时间、所有者以及相关的权限信息等 

  for(FileStatus status : fileStatus){ 

  if(fileSystem.exists(status.getPath())){ 

  System.out.println("存在"); 

  System.out.println("文件路径: "+status.getPath()); 

  System.out.println("文件备份数： "+status.getReplication()); 

}

  else{ 

  System.out.println("不存在"); 

}

}

}

/**

 
  * 使用PathFilter实现的通配符文件匹配 

 
  * */ 

  public static void getFileByPathFilter(String filePath) throws Exception{ 

  FileSystem fileSystem = getFileSystem(); 

  FileStatus[] fileStatus = fileSystem.globStatus(new Path(filePath), new MyPathFilter(".*t")); 

  //注意：FileStatus对象封装了文件系统中文件和目录的元数据，包括文件的长度、块大小、备份数、修改时间、所有者以及相关的权限信息等 

  for(FileStatus status : fileStatus){ 

  if(fileSystem.exists(status.getPath())){ 

  System.out.println("存在"); 

  System.out.println("文件路径: "+status.getPath()); 

  System.out.println("文件备份数： "+status.getReplication()); 

}

  else{ 

  System.out.println("不存在"); 

}

}

}

  public static void main(String[] args) throws Exception { 

  //FileSystem fileSystem = getFileSystem(); 

  //String sourceFile = "E:\\大数据包\\data_test\\sourceFile.txt"; 

  //String targetFile = "hdfs://myhadoop:9000/targetFile.txt"; 

  /*String fileName = "/wordcount/input/wc.txt"; 

  readFile(fileName);*/ 

  /*String disInputStr = "hdfs://myhadoop:9000/wordcount/outspark/part*"; 

  getDispInfo(disInputStr);*/ 

  String myPathFilterStr = "hdfs://myhadoop:9000/wordcount/input/*"; 

  getFileByPathFilter(myPathFilterStr); 

}

}

 
 /*自定义的PathFilter类*/ 

  public class MyPathFilter implements PathFilter{ 

  private final String regex; 

  public MyPathFilter(String regex){ 

  this.regex = regex; 

}

  public boolean accept(Path path) { 

  // TODO Auto-generated method stub 

  //留下与正则表达式相匹配的文件 

  return path.toString().matches(regex); 

}

}

关注