一、准备工作(only一个节点即可,not all)
1、maven准备
2、安装lzo
wget http://www.oberhumer.com/opensource/lzo/download/lzo-2.06.tar.gz
tar -zxvf lzo-2.06.tar.gz -C ../
cd lzo-2.06
mkdir -p /usr/local/lzo206
./configure --enable-shared --prefix=/usr/local/lzo206
make && make install
将/usr/local/lzo206 目录下的所有文件打包,并同步到集群中的所有机器上。
scp bis-newdatanode-s2b-80:/usr/local/lzo206/lib /root/tmpdir/lzo
./dcopy -r /root/tmpdir/lzo/lib/\* /usr/lib64
3、设置环境变量
vi /etc/profile
export C_INCLUDE_PATH=/usr/local/lzo206/include
export LIBRARY_PATH=/usr/local/lzo206/lib
二、安装hadoop-lzo
wget https://github.com/twitter/hadoop-lzo/archive/master.zip
unzip master.zip -d ../
cd hadoop-lzo-master
修改pom.xml中的hadoop.current.version值为当前版本2.7.3
export C_INCLUDE_PATH=/usr/local/lzo206/include
export LIBRARY_PATH=/usr/local/lzo206/lib
mvn clean package -Dmaven.test.skip=true
cp target/hadoop-lzo-0.4.21-SNAPSHOT.jar /home/test/hadoop-2.7.3/share/hadoop/common
cp /root/hadoop-lzo-master/target/native/Linux-amd64-64/lib/* /usr/lib64
cp /root/hadoop-lzo-master/target/native/Linux-amd64-64/lib/* /home/test/hadoop-2.7.3/lib/native
chown -R test.test /home/test/hadoop-2.7.3/share/hadoop/common/*
分发至各个节点
scp /home/test/hadoop-2.7.3/share/hadoop/common/hadoop-lzo-0.4.21-SNAPSHOT.jar slave1:/home/test/hadoop-2.7.3/share/hadoop/common/
scp /home/test/hadoop-2.7.3/lib/native/libgplcompression* slave1:/home/test/hadoop-2.7.3/lib/native/
配置 core-site.xml
<property>
<name>io.compression.codecs</name>
<value>
org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,
com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec
</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
scp /home/test/hadoop-2.7.3/etc/hadoop/core-site.xml slave1:/home/test/hadoop-2.7.3/etc/hadoop/
验证:
---load lzo文件 to hive table
create table test_lzo(
id int,
name string)
STORED AS INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
echo -e "2\001xjp">>11.txt
lzop 11.txt
load data local inpath '/home/test/11.txt.lzo' overwrite into table test_lzo;
--hive operator with lzo compression
set hive.exec.compress.output=true;
set mapred.output.compression.codec=com.hadoop.compression.lzo.LzoCodec;
set mapred.output.compression.type=BLOCK;
set io.seqfile.compression.type=BLOCK;
create table test_lzo5(
id int,
name string)
stored as sequencefile;
insert into test_lzo5 select * from test_lzo;
create table test_lzo6(
id int,
name string)
insert into test_lzo6 select * from test_lzo;
附:
---map压缩输出参数
set mapred.compress.map.output=true;
set mapred.map.output.compression.codec=com.hadoop.compression.lzo.LzoCodec;
set mapred.map.output.compression.type=block;
---reduce 压缩输出
mapred.output.compress
mapred.output.compression.codec
mapred.output.compression.type=BLOCK
1、maven准备
2、安装lzo
wget http://www.oberhumer.com/opensource/lzo/download/lzo-2.06.tar.gz
tar -zxvf lzo-2.06.tar.gz -C ../
cd lzo-2.06
mkdir -p /usr/local/lzo206
./configure --enable-shared --prefix=/usr/local/lzo206
make && make install
将/usr/local/lzo206 目录下的所有文件打包,并同步到集群中的所有机器上。
scp bis-newdatanode-s2b-80:/usr/local/lzo206/lib /root/tmpdir/lzo
./dcopy -r /root/tmpdir/lzo/lib/\* /usr/lib64
3、设置环境变量
vi /etc/profile
export C_INCLUDE_PATH=/usr/local/lzo206/include
export LIBRARY_PATH=/usr/local/lzo206/lib
二、安装hadoop-lzo
wget https://github.com/twitter/hadoop-lzo/archive/master.zip
unzip master.zip -d ../
cd hadoop-lzo-master
修改pom.xml中的hadoop.current.version值为当前版本2.7.3
export C_INCLUDE_PATH=/usr/local/lzo206/include
export LIBRARY_PATH=/usr/local/lzo206/lib
mvn clean package -Dmaven.test.skip=true
cp target/hadoop-lzo-0.4.21-SNAPSHOT.jar /home/test/hadoop-2.7.3/share/hadoop/common
cp /root/hadoop-lzo-master/target/native/Linux-amd64-64/lib/* /usr/lib64
cp /root/hadoop-lzo-master/target/native/Linux-amd64-64/lib/* /home/test/hadoop-2.7.3/lib/native
chown -R test.test /home/test/hadoop-2.7.3/share/hadoop/common/*
分发至各个节点
scp /home/test/hadoop-2.7.3/share/hadoop/common/hadoop-lzo-0.4.21-SNAPSHOT.jar slave1:/home/test/hadoop-2.7.3/share/hadoop/common/
scp /home/test/hadoop-2.7.3/lib/native/libgplcompression* slave1:/home/test/hadoop-2.7.3/lib/native/
配置 core-site.xml
<property>
<name>io.compression.codecs</name>
<value>
org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,
com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec
</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
scp /home/test/hadoop-2.7.3/etc/hadoop/core-site.xml slave1:/home/test/hadoop-2.7.3/etc/hadoop/
验证:
---load lzo文件 to hive table
create table test_lzo(
id int,
name string)
STORED AS INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
echo -e "2\001xjp">>11.txt
lzop 11.txt
load data local inpath '/home/test/11.txt.lzo' overwrite into table test_lzo;
--hive operator with lzo compression
set hive.exec.compress.output=true;
set mapred.output.compression.codec=com.hadoop.compression.lzo.LzoCodec;
set mapred.output.compression.type=BLOCK;
set io.seqfile.compression.type=BLOCK;
create table test_lzo5(
id int,
name string)
stored as sequencefile;
insert into test_lzo5 select * from test_lzo;
create table test_lzo6(
id int,
name string)
insert into test_lzo6 select * from test_lzo;
附:
---map压缩输出参数
set mapred.compress.map.output=true;
set mapred.map.output.compression.codec=com.hadoop.compression.lzo.LzoCodec;
set mapred.map.output.compression.type=block;
---reduce 压缩输出
mapred.output.compress
mapred.output.compression.codec
mapred.output.compression.type=BLOCK