import java.net.URI;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class FindTxts {
public static void main(String[] args) throws Exception {
ArrayList<String> pathList = findTxts("/home/data");
for (int i = 0; i < pathList.size(); i++) {
System.out.println(pathList.get(i));
}
}
private static ArrayList<String> findTxts(String parentDir) throws Exception{
ArrayList<String> pathList=new ArrayList<String>();
Configuration conf=new Configuration();
FileSystem fs=FileSystem.get(URI.create(parentDir),conf);
Path path=new Path(parentDir);
FileStatus status[]=fs.listStatus(path);
String PathString=new String();
for(int i = 0;i<status.length;i++){
PathString=status[i].getPath().toString();
if(status[i].isFile()&&PathString.endsWith(".txt")) pathList.add(PathString); //判断PathString是否为parentDir/*.txt的形式
else{
if(status[i].isDirectory()) findTxts(PathString,pathList); //如果status[i]代表的是一个目录,执行findTxts(String parentDir,ArrayList<String> pathList)方法
}
}
return pathList;
}
private static void findTxts(String parentDir, ArrayList<String> pathList) throws Exception {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(parentDir), conf);
Path path = new Path(parentDir);
FileStatus status[] = fs.listStatus(path);
String PathString = new String();
for(int i = 0;i<status.length;i++){
PathString=status[i].getPath().toString();
if(status[i].isFile()&&PathString.endsWith(".txt")) pathList.add(PathString); //判断PathString是否为parentDir/*.txt的形式
else{
if(status[i].isDirectory()) findTxts(PathString,pathList); //如果status[i]代表的是一个目录,执行findTxts(String parentDir,ArrayList<String> pathList)方法
}
}
return;
}
}
hadoop 遍历hdfs文件夹列出其中所有的txt文件
最新推荐文章于 2022-03-24 19:49:02 发布