一. 安装lzop工具
yum install -y svn ncurses-devel
yum install -y gcc gcc-c++ make cmake
yum install -y openssl openssl-devel svn ncurses-devel zlib-devel libtool
yum install -y lzo lzo-devel lzop autoconf automake cmake
二. lzop压缩解压功能验证
lzop -v fileName
lzop -dv fileName
三. 获取hadoop-lzo
unzip ./hadoop-lzo-master.zip
- 进入hadoop-lzo-master目录,用maven打包,生成jar包
cd ./hadoop-lzo-master
mvn clean package -Dmaven.test.skip=true
- 进入target目录,重命名架包,并拷贝至Hadoop目录的common目录下
cd ./target/
mv ./hadoop-lzo-0.4.21-SNAPSHOT.jar hadoop-lzo.jar
cp ./hadoop-lzo.jar /opt/soft/hadoop260/share/hadoop/common/
stop-all.sh
cd /opt/soft/hadoop260/etc/hadoop
vi ./core-site.xml
<property>
<name>io.compression.codecs</name>
<value>
org.apache.hadoop.io.compress.GzipCodec,
org.apache.hadoop.io.compress.DefaultCodec,
org.apache.hadoop.io.compress.BZip2Codec,
org.apache.hadoop.io.compress.SnappyCodec,
com.hadoop.compression.lzo.LzoCodec,
com.hadoop.compression.lzo.LzopCodec
</value>
</property>
- 打开mapred-site.xml文件,添加以下内容
vi ./mapred-site.xml
#开启mr输出时的压缩
<property>
<name>mapreduce.output.fileoutputformat.compress</name>
<value>true</value>
</property>
<property>
<name>mapreduce.output.fileoutputformat.compress.codec</name>
<value>org.apache.hadoop.io.compress.BZip2Codec</value>
</property>
#开启mr中map阶段的输出压缩
<property>
<name>mapreduce.map.output.compress</name>
<value>true</value>
</property>
#指定mr中map阶段的输出压缩为Snappy
<property>
<name>mapreduce.map.output.compress.codec</name>
<value>org.apache.hadoop.io.compress.SnappyCodec</value>
</property>
start-all.sh
四. hdfs创建索引测试
- 为hdfs目录下的/tmp/sample.csv.lzo文件创建索引
hadoop jar $HADOOP_HOME/share/hadoop/common/hadoop-lzo.jar
com.hadoop.compression.lzo.LzoIndexer /tmp/sample.csv.lzo