importjava.io.File;importjava.io.IOException;importjava.util.zip.GZIPOutputStream;importjava.util.zip.ZipEntry;importjava.util.zip.ZipInputStream;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.FSDataInputStream;importorg.apache.hadoop.fs.FSDataOutputStream;importorg.apache.hadoop.fs.FileStatus;importorg.apache.hadoop.fs.FileSystem;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.Text;/*** Created by Administrator on 12/10/2017.*/
public classConvertHdfsZipFileToGzipFile {public static boolean isRecur = false;public static void main(String[] args) throwsIOException {if (args.length == 0)
errorMessage("1filesmerge [-r|-R] ");if (args[0].matches("^-[rR]$")) {
isRecur= true;
}if ((isRecur && args.length != 4) || ( !isRecur && args.length != 3)) {
errorMessage("2filesmerge [-r|-R] ");
}
Configuration conf= newConfiguration();
FileSystem hdfs=FileSystem.get(conf);
Path inputDir;
Path hdfsFile;
Text pcgroupText;//hadoop jar myjar.jar ConvertHdfsZipFileToGzipFile -r /zip/(待转换文件路径,在HDFS上) /user/j/pconline/(转换完成后的文件存储地址,也在HDFS上) pconline(待转换的文件名包含的字符)
if(isRecur){
inputDir= new Path(args[1]);
hdfsFile= new Path(args[2]);
pcgroupText= new Text(args[3]);
}//hadoop jar myjar.jar ConvertHdfsZipFileToGzipFile /zip/(待转换文件路径,在HDFS上) /user/j/pconline/(转换完成后的文件存储地址,也在HDFS上) pconline(待转换的文件名包含的字符)
else{
inputDir= new Path(args[0]);
hdfsFile= new Path(args[1]);
pcgroupText= new Text(args[2]);
}if (!hdfs.exists(inputDir)) {
errorMessage("3hdfsTargetDir not exist!");
}if(hdfs.exists(hdfsFile)) {
errorMessage("4hdfsFileName exist!");
}
merge(inputDir, hdfsFile, hdfs, pcgroupText);
System.exit(0);
}/***@author*@paraminputDir zip文件的存储地址
*@paramhdfsFile 解压结果的存储地址
*@paramhdfs 分布式文件系统数据流
*@parampcgroupText 需要解压缩的文件关键名*/
public static voidmerge(Path inputDir, Path hdfsFile,
FileSystem hdfs, Text pcgroupText) {try{//文件系统地址inputDir下的FileStatus
FileStatus[] inputFiles =hdfs.listStatus(inputDir);for (int i = 0; i < inputFiles.length; i++) {if (!hdfs.isFile(inputFiles[i].getPath())) {if(isRecur){
merge(inputFiles[i].getPath(), hdfsFile, hdfs,pcgroupText);return;
}else{
System.out.println(inputFiles[i].getPath().getName()+ "is not file and not allow recursion, skip!");continue;
}
}//判断文件名是否在需要解压缩的关键名内
if(inputFiles[i].getPath().getName().contains(pcgroupText.toString()) == true){//输出待解压的文件名
System.out.println(inputFiles[i].getPath().getName());//将数据流指向待解压文件
FSDataInputStream in =hdfs.open(inputFiles[i].getPath());/***数据的解压执行过程*/ZipInputStream zipInputStream= null;try{
zipInputStream= newZipInputStream(in);
ZipEntry entry;//解压后有多个文件一并解压出来并实现合并//合并后的地址
FSDataOutputStream mergerout = hdfs.create(new Path(hdfsFile + File.separator +inputFiles[i].getPath().getName().substring(0, inputFiles[i].getPath().getName().indexOf("."))));while((entry = zipInputStream.getNextEntry()) != null){int bygeSize1=2*1024*1024;byte[] buffer1 = new byte[bygeSize1];intnNumber;while((nNumber = zipInputStream.read(buffer1,0, bygeSize1)) != -1){
mergerout.write(buffer1,0, nNumber);
}
}
mergerout.flush();
mergerout.close();
zipInputStream.close();
}catch(IOException e){continue;
}
in.close();/***将解压合并后的数据压缩成gzip格式*/GZIPOutputStream gzipOutputStream= null;try{
FSDataOutputStream outputStream= null;
outputStream= hdfs.create(new Path(hdfsFile + File.separator +inputFiles[i].getPath().getName().substring(0, inputFiles[i].getPath().getName().indexOf(".")) + ".gz"));
FSDataInputStream inputStream= null;
gzipOutputStream= newGZIPOutputStream(outputStream);
inputStream= hdfs.open(new Path(hdfsFile + File.separator + inputFiles[i].getPath().getName().substring(0, inputFiles[i].getPath().getName().indexOf("."))));int bygeSize=2*1024*1024;byte[] buffer = new byte[bygeSize];intlen;while((len = inputStream.read(buffer)) > 0){
gzipOutputStream.write(buffer,0, len);
}
inputStream.close();
gzipOutputStream.finish();
gzipOutputStream.flush();
outputStream.close();
}catch(Exception exception){
exception.printStackTrace();
}
gzipOutputStream.close();//删除zip文件解压合并后的临时文件
String tempfiles = hdfsFile + File.separator + inputFiles[i].getPath().getName().substring(0, inputFiles[i].getPath().getName().indexOf("."));try{if(hdfs.exists(newPath(tempfiles))){
hdfs.delete(new Path(tempfiles), true);
}
}catch(IOException ie){
ie.printStackTrace();
}
}
}
}catch(IOException e) {
e.printStackTrace();
}
}public static voiderrorMessage(String str) {
System.out.println("Error Message: " +str);
System.exit(1);
}
}