数据流的压缩和解压缩
CompressionCodec有两个方法可以用于轻松地压缩或解压缩数据。要想对正在被写入一个输出流的数据进行压缩,我们可以使用createOutputStream(OutputStreamout)方法创建一个CompressionOutputStream,将其以压缩格式写入底层的流。相反,要想对从输入流读取而来的数据进行解压缩,则调用createInputStream(InputStreamin)函数,从而获得一个CompressionInputStream,从而从底层的流读取未压缩的数据。
测试一下如下压缩方式:
DEFLATE | org.apache.hadoop.io.compress.DefaultCodec |
gzip | org.apache.hadoop.io.compress.GzipCodec |
bzip2 | org.apache.hadoop.io.compress.BZip2Codec |
代码实现
package com.lzz.mapreduce.compress;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.avro.file.Codec;
import org.apache.curator.framework.api.Decompressible;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.mapreduce.lib.input.CompressedSplitLineReader;
import org.apache.hadoop.util.ReflectionUtils;
public class TestCompress {
public static void main(String[] args) throws ClassNotFoundException, IOException {
//1测试压缩
//compress("g:/pd.txt","org.apache.hadoop.io.compress.BZip2Codec");
//compress("g:/pd.txt","org.apache.hadoop.io.compress.GzipCodec");
//compress("g:/pd.txt","org.apache.hadoop.io.compress.DefaultCodec");
//测试解压缩
//decompress("g:/pd.txt.bz2");
//decompress("g:/pd.txt.g2");
//decompress("g:/pd.txt.deflate");
}
//测试解压缩
private static void decompress(String filename) throws FileNotFoundException, IOException {
//0校验
CompressionCodecFactory factory=new CompressionCodecFactory(new Configuration());
CompressionCodec codec=factory.getCodec(new Path(filename));
if(codec==null) {
System.out.println("不支持该解码"+filename);
return;
}
//1获取输入流
CompressionInputStream compressionInputStream=codec.createInputStream(new FileInputStream(new File(filename)));
//2获取输出流
FileOutputStream foStream=new FileOutputStream(new File(filename+".decoded"));
//3流的对拷
IOUtils.copyBytes(compressionInputStream, foStream,1024*1024*5, false);
//4关闭资源
compressionInputStream.close();
foStream.close();
}
@SuppressWarnings("resource")
private static void compress(String filename, String method) throws ClassNotFoundException, IOException {
//1获取输入流
FileInputStream fileInputStream=new FileInputStream(new File(filename));
Class classname=Class.forName(method);
CompressionCodec codec=(CompressionCodec) ReflectionUtils.newInstance(classname, new Configuration());
//2获取输出流
FileOutputStream fileOutputStream=new FileOutputStream(new File(filename)+codec.getDefaultExtension());
CompressionOutputStream compressionOutputStream=codec.createOutputStream(fileOutputStream);
//3流的对拷
IOUtils.copyBytes(fileInputStream, compressionOutputStream, 1024*1024*5, false);
//4关闭资源
fileInputStream.close();
compressionOutputStream.close();
fileOutputStream.close();
}
}