package com.appcluster.datarefix;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
public class DataRegixlv2 {
private static Configuration conf = new Configuration();
private static FileSystem fs;
private static DistributedFileSystem hdfs;
private static String date = getCurrentDate();
static {
try {
Configuration conf = new Configuration();
conf.addResource(new Path("/AppCluster2/config/core-site.xml"));
conf.addResource(new Path("/AppCluster2/config/hdfs-site.xml"));
///AppCluster2/src
fs = FileSystem.get(conf);
hdfs = (DistributedFileSystem)fs;
} catch (Exception e) {
e.printStackTrace();
}
}
//检查文件是否存在
public static void checkFile(String fileName) throws IOException {
Path f = new Path(fileName);
boolean isExists = hdfs.exists(f);
if (isExists) { //if exists, delete
boolean isDel = hdfs.delete(f,true);
System.out.println(fileName + " delete? \t" + isDel);
} else {
System.out.println(fileName + " exist? \t" + isExists);
}
}
public static void readHDFSListAll(String filename) throws Exception{
String dst = "hdfs://127.0.0.1:9000/user/cuijh/data/ptdate="+date;
FileSystem fs = FileSystem.get(URI.create(dst), conf);
//流读入和写入
InputStream in=null;
//获取HDFS的conf
//读取HDFS上的文件系统
FileSystem hdfs=FileSystem.get(conf);
//使用缓冲流,进行按行读取的功能
BufferedReader buff=null;
//获取日志文件的根目录
Path listf =new Path(filename);
//获取根目录下的所有2级子文件目录
FileStatus stats[]=hdfs.listStatus(listf);
for(int i = 0; i < stats.length; i++){
//获取子目录下的文件路径
FileStatus temp[]=hdfs.listStatus(new Path(stats[i].getPath().toString()));
for(int k = 0; k < temp.length;k++){
System.out.println("文件路径名:"+temp[k].getPath().toString());
//获取Path
Path p=new Path(temp[k].getPath().toString());
//打开文件流
in=hdfs.open(p);
//BufferedReader包装一个流
buff=new BufferedReader(new InputStreamReader(in));
String str=null;
Path pathlv2 = new Path("hdfs://127.0.0.1:9000/user/cuijh/test/ptdate="+date+"/lv2");
FSDataOutputStream fdoslv2= fs.create(pathlv2,false);
while((str=buff.readLine())!=null){
if(str.trim().indexOf("lv2") != -1){
fdoslv2.writeUTF(str.trim()+"\n");
fdoslv2.flush();
System.out.println(" --"+str+ " : ");
}
}
fdoslv2.close();
buff.close();
in.close();
}
}
hdfs.close();
}
public static String getCurrentDate(){
Date dt=new Date();
SimpleDateFormat matter1=new SimpleDateFormat("yyyy-MM-dd");
return matter1.format(dt);
}
public static void main(String[] args) throws Exception {
checkFile("hdfs://127.0.0.1:9000/user/cuijh/test/ptdate="+date+"/lv2");
checkFile("hdfs://127.0.0.1:9000/user/cuijh/test/ptdate="+date+"/lv3");
readHDFSListAll("hdfs://127.0.0.1:9000/user/cuijh/data/ptdate="+date);
}
}
hdfs java 文件读写
最新推荐文章于 2023-06-11 11:20:05 发布