CompressionCodec 有两个方法可以用于轻松地压缩或解压缩数据。要想对正在被写入
一个输出流的数据进行压缩,我们可以使用 createOutputStream(OutputStreamout)方法创建一个 CompressionOutputStream,将其以压缩格式写入底层的流。相反,要想对从输入流读取而来的数据进行解压缩, 则调用 createInputStream(InputStreamin) 函数, 从而获得一个CompressionInputStream,从而从底层的流读取未压缩的数据。测试一下如下压缩方式:
|
package com.xmnzdx.mapreduce.wordcount.compression;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.util.ReflectionUtils;
public class TestCompression {
public static void main(String[] args) throws ClassNotFoundException, IOException {
//压缩
// compression("d:\\hello.txt","org.apache.hadoop.io.compress.DefaultCodec");
// compression("d:\\hello.txt","org.apache.hadoop.io.compress.GzipCodec");
// compression("d:\\hello.txt","org.apache.hadoop.io.compress.BZip2Codec");
//解压
decomprdssion("d:\\hello.txt.bz2");
}
/**
* 解压
* @param compFileName
* @throws IOException
*/
private static void decomprdssion(String compFileName) throws IOException {
//0校验是否能解压缩
CompressionCodecFactory faction = new CompressionCodecFactory(new Configuration());
CompressionCodec codec = faction.getCodec(new Path(compFileName));
if(codec == null){
System.out.println("cannot find codec for file"+compFileName);
return;
}
//1 获取输入流
FileInputStream fis = new FileInputStream(compFileName);
CompressionInputStream cis = codec.createInputStream(fis);
//2 获取输出流
FileOutputStream fos = new FileOutputStream(compFileName+".decoded");
//3流的对拷
IOUtils.copyBytes(cis, fos, 1024 * 1024 * 5);
//4关闭流
fos.close();
cis.close();
fis.close();
}
/**
* 压缩
* @param filename 原文件
* @param code 压缩编码
* @throws ClassNotFoundException
* @throws IOException
*/
private static void compression(String filename, String code) throws ClassNotFoundException, IOException {
//1 获取输入流
FileInputStream fis = new FileInputStream(filename);
//2 获取输出流
//通过反射创建一个压缩编码(CompressionCodec)对象
Class codeClass = Class.forName(code);
CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(codeClass, new Configuration());
//创建一个普通的输出流,指定的输出文件名为原文件名+指定压缩的后缀名。hello.txt.DEFLATE
FileOutputStream fos = new FileOutputStream(new File(filename+codec.getDefaultExtension()));
//通过压缩编码对象的createOutputStream方法创建压缩输出流(CompressionOutputStream)
CompressionOutputStream cos = codec.createOutputStream(fos);
//3 流的对拷
IOUtils.copyBytes(fis, cos, 1024 * 1024 * 5);
//4 关闭流
cos.close();
fos.close();
fis.close();
}
}