1 安装依赖软件
#3台服务器都安装
yum install lzop -y
2 jar包安装
#3台服务器,都要拷贝安装
cp hadoop-lzo-0.4.20.jar /data/hadoop/share/hadoop/common/
#查看
[root@master11 01]# ll /data/hadoop/share/hadoop/common/
total 8688
-rw-r--r--. 1 root root 4638196 Mar 4 14:36 hadoop-common-3.4.0.jar
-rw-r--r--. 1 root root 3579389 Mar 4 14:36 hadoop-common-3.4.0-tests.jar
-rw-r--r--. 1 root root 96792 Mar 4 14:38 hadoop-kms-3.4.0.jar
-rw-r--r--. 1 root root 193831 May 15 17:20 hadoop-lzo-0.4.20.jar
-rw-r--r--. 1 root root 170442 Mar 4 14:38 hadoop-nfs-3.4.0.jar
-rw-r--r--. 1 root root 191049 Mar 4 14:39 hadoop-registry-3.4.0.jar
drwxr-xr-x. 2 root root 4096 Mar 4 16:04 jdiff
drwxr-xr-x. 2 root root 8192 Mar 4 14:38 lib
drwxr-xr-x. 2 root root 89 Mar 4 16:05 sources
drwxr-xr-x. 3 root root 20 Mar 4 16:05 webapps
3 本地压缩测试
[root@master11 soft]# ll -h access.log
-rw-r--r--. 1 root root 777M May 15 17:37 access.log
[root@master11 soft]# lzop -v access.log
compressing access.log into access.log.lzo
[root@master11 soft]# ll -h access.log*
-rw-r--r--. 1 root root 777M May 15 17:37 access.log
-rw-r--r--. 1 root root 94M May 15 17:37 access.log.lzo
4 修改配置文件
#3台服务器都要修改,增加如下配置
vi /data/hadoop/etc/hadoop/core-site.xml
<property>
<name>io.compression.codecs</name>
<value>
org.apache.hadoop.io.compress.GzipCodec,
org.apache.hadoop.io.compress.DefaultCodec,
org.apache.hadoop.io.compress.BZip2Codec,
org.apache.hadoop.io.compress.SnappyCodec,
com.hadoop.compression.lzo.LzoCodec,
com.hadoop.compression.lzo.LzopCodec
</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
5 重新启动集群
stop-all.sh
start-all.sh
6 测试
#上传测试文件
hdfs dfs -put word.txt /test/02/
#执行
hadoop jar /data/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.4.0.jar wordcount -Dmapreduce.output.fileoutputformat.compress=true -Dmapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzopCodec /test/02/word.txt /test/output3/
#查看
root@master11 01]# hdfs dfs -ls /test/output3
Found 2 items
-rw-r--r-- 3 root supergroup 0 2024-05-15 21:27 /test/output3/_SUCCESS
-rw-r--r-- 3 root supergroup 89 2024-05-15 21:27 /test/output3/part-r-00000.lzo
#创建lzo索引
hadoop jar /data/hadoop/share/hadoop/common/hadoop-lzo-0.4.20.jar com.hadoop.compression.lzo.LzoIndexer /test/output3/
#再次执行wordcount
hadoop jar /data/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.4.0.jar wordcount -Dmapreduce.output.fileoutputformat.compress=true -Dmapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzopCodec /test/output3/ /test/output5/
[root@master11 soft]# hdfs dfs -ls /test/output5/
Found 2 items
-rw-r--r-- 3 root supergroup 0 2024-05-15 22:23 /test/output5/_SUCCESS
-rw-r--r-- 3 root supergroup 6098076 2024-05-15 22:23 /test/output5/part-r-00000.lzo