public classReadHDFS {public static voidmain(String[]args){long startLong =System.currentTimeMillis();
HDFSReadLog.writeLog("start read file");
String path;if (args.length > 1) {//path = args[0];
Constant.init(args[0],args[1]);
}HDFSReadLog.writeLog(Constant.PATH);try{
getFile(Constant.URI+Constant.PATH);
}catch(IOException e) {
e.printStackTrace();
}long endLong =System.currentTimeMillis();
HDFSReadLog.writeLog("cost " + (endLong -startLong)/1000 + " seconds");
HDFSReadLog.writeLog("cost " + (endLong -startLong)/1000/60 + " minute");
}private static void getFile(String filePath) throwsIOException {
FileSystem fs=FileSystem.get(URI.create(filePath), HDFSConf.getConf());
Path path= newPath(filePath);if (fs.exists(path) &&fs.isDirectory(path)) {
FileStatus[] stats=fs.listStatus(path);
FSDataInputStream is;
FileStatus stat;byte[] buffer;intindex;
StringBuilder lastStr= newStringBuilder();for(FileStatus file : stats){try{
HDFSReadLog.writeLog("start read : " +file.getPath());
is=fs.open(file.getPath());
stat=fs.getFileStatus(path);int sum =is.available();if(sum == 0){
HDFSReadLog.writeLog("have no data : " +file.getPath() );continue;
}
HDFSReadLog.writeLog("there have : " + sum + " bytes");
buffer= new byte[sum];
// 注意一点,如果文件太大了,可能会内存不够用。在本机测得时候,读一个100多M的文件,导致内存不够。
is.readFully(0,buffer);
String result=Bytes.toString(buffer);
// 写到 hbaseWriteHBase.writeHbase(result);is.close();
HDFSReadLog.writeLog("read : " + file.getPath() + " end");
}catch(IOException e){
e.printStackTrace();
HDFSReadLog.writeLog("read " + file.getPath() +" error");
HDFSReadLog.writeLog(e.getMessage());
}
}
HDFSReadLog.writeLog("Read End");
fs.close();
}else{
HDFSReadLog.writeLog(path+ " is not exists");
}
}
}