zhuanzi :http://guoyunsky.iteye.com/blog/1266226
压缩是绕不开的话题,因为当今很多程序的压力还是在IO.特别是Hadoop这种分布式存储和运算框架,单台机器的IO,网络通信IO都是压力和挑战.关于Hadoop为什么要用Lzo来压缩而没有选用传统的压缩方法,我这里不再阐述.相关资料很多.有兴趣的可以查看cloudera这篇博客:http://www.cloudera.com/blog/2009/11/hadoop-at-twitter-part-1-splittable-lzo-compression/
这里只是用于读写lzo文件,具体请看代码吧.
- package com.guoyun.hadoop.io.study;
- import java.io.BufferedReader;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.InputStreamReader;
- import java.io.OutputStream;
- import java.util.ArrayList;
- import java.util.List;
- import org.apache.hadoop.conf.Configuration;
- import com.hadoop.compression.lzo.LzopCodec;
- /**
- * 读写Lzo文件
- */
- public class LzoFileStudy {
- private static Configuration getDefaultConf(){
- Configuration conf=new Configuration();
- conf.set("mapred.job.tracker", "local");
- conf.set("fs.default.name", "file:///");
- conf.set("io.compression.codecs", "com.hadoop.compression.lzo.LzoCodec");
- return conf;
- }
- /**
- * 写入数据到lzo文件
- *
- * @param destLzoFilePath
- * @param conf
- * @param datas
- */
- public static void write2LzoFile(String destLzoFilePath,Configuration conf,byte[] datas){
- LzopCodec lzo=null;
- OutputStream out=null;
- try {
- lzo=new LzopCodec();
- lzo.setConf(conf);
- out=lzo.createOutputStream(new FileOutputStream(destLzoFilePath));
- out.write(datas);
- } catch (FileNotFoundException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }finally{
- try {
- if(out!=null){
- out.close();
- }
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
- /**
- * 从lzo文件中读取数据
- *
- * @param lzoFilePath
- * @param conf
- * @return
- */
- public static List<String> readLzoFile(String lzoFilePath,Configuration conf){
- LzopCodec lzo=null;
- InputStream is=null;
- InputStreamReader isr=null;
- BufferedReader reader=null;
- List<String> result=null;
- String line=null;
- try {
- lzo=new LzopCodec();
- lzo.setConf(conf);
- is=lzo.createInputStream(new FileInputStream(lzoFilePath));
- isr=new InputStreamReader(is);
- reader=new BufferedReader(isr);
- result=new ArrayList<String>();
- while((line=reader.readLine())!=null){
- result.add(line);
- }
- } catch (FileNotFoundException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }finally{
- try {
- if(reader!=null){
- reader.close();
- }
- if(isr!=null){
- isr.close();
- }
- if(is!=null){
- is.close();
- }
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- return result;
- }
- /**
- * @param args
- */
- public static void main(String[] args) {
- // 生成数据
- String dataSource="abcdefghijklmnopqrstuvwxyz0123456789~!@#¥%……&*()——+\r";
- dataSource=dataSource.concat(dataSource);
- dataSource=dataSource.concat(dataSource);
- dataSource=dataSource.concat(dataSource);
- String lzoFilePath="./data/test.lzo";
- // 写入到lzo文件
- write2LzoFile(lzoFilePath,getDefaultConf(),dataSource.getBytes());
- StringBuilder sb=new StringBuilder();
- // 读取lzo文件
- List<String> lines=readLzoFile(lzoFilePath,getDefaultConf());
- for(String line:lines){
- sb.append(line);
- sb.append("\r");
- }
- // 数据是否一致
- if(sb.toString().equals(dataSource)){
- System.out.println(sb.toString());
- }else{
- System.err.println("Error line:"+sb.toString());
- }
- }
- }