1. 前提 概要 (需要软件包)
hadoop 版本 1.0.3 已经安装完成 ,可以运行
操作系统 centos 6.3 64位
2. 安装lzo 相关库及工具
yum install lzo-devel.x86_64 lzop.x86_64 gcc --nogpgcheck
3 安装ant 工具
删除旧版本:yum remove ant
[root@master data]# tar xzvf apache-ant-1.8.4-bin.tar.gz
[root@master data]# ls
apache-ant-1.8.4 apache-ant-1.8.4-bin.tar.gz hadoop-lzo-master hadoop-lzo-master.zip jdk-7u11-linux-x64.rpm
[root@master data]# cd apache-ant-1.8.4
[root@master apache-ant-1.8.4]# pwd
/data/apache-ant-1.8.4
[root@master apache-ant-1.8.4]#
a. vim /etc/profile
在最后添加(只要在master上配置就可以了)
export ANT_HOME=/data/apache-ant-1.8.4
b. vim /etc/profile.d/java.sh #java.sh 我新建 用于配置hadoop java 环境
export JAVA_HOME=/usr/java/jdk1.7.0_11
export JRE_HOME=$JAVA_HOME/jre
export HADOOP_HOME=/opt/modules/hadoop/hadoop-1.0.3
export HBASE_HOME=$HADOOP_HOME/hbase
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$HADOOP_HOME/lib
export HBASE_CLASSPATH=$HBASE_CLASSPATH:$HBASE_HOME/lib
export CLASSPATH=.:$HADOOP_HOME/lib:$HBASE_HOME/lib:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=$ANT_HOME/bin:$HADOOP_HOME/bin:$HBASE_HOME/bin:$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH
[root@master apache-ant-1.8.4]# source /etc/profile #使java.sh立即生效
4. 编译 hadoop-lzo , 部署库
[root@master data]# unzip hadoop-lzo-master.zip
[root@master data]# ls
apache-ant-1.8.4 apache-ant-1.8.4-bin.tar.gz hadoop-lzo-master hadoop-lzo-master.zip jdk-7u11-linux-x64.rpm
[root@master data]# cd hadoop-lzo-master
[root@master apache-ant-1.8.4]# pwd
/data/hadoop-lzo-master
[root@master apache-ant-1.8.4]# export JAVA_HOME=/usr/java/jdk1.7.0_11
[root@master apache-ant-1.8.4]# export CFLAGS=-m64 #如果操作系统是64位的需要这个指定
[root@master apache-ant-1.8.4]# export CXXFLAGS=-m64
[root@master apache-ant-1.8.4]# ant compile-native tar #编译如果出错,自己从网上找解决方案,很多
[root@master hadoop-lzo-master]# cp build/hadoop-lzo-0.4.17-SNAPSHOT.jar /opt/modules/hadoop/hadoop-1.0.3/lib/
[root@master hadoop-lzo-master]# tar -cBf - -C build/native . | tar -xBvf - -C /opt/modules/hadoop/hadoop-1.0.3/lib/native
删除$HADOOP_HOME/lib目录下的hadoop-gpl-compression-0.1.0.jar
cd /data/hadoop-lzo-master/build/native/Linux-amd64-64/lib
cp * $HADOOP_HOME/lib/native/Linux-amd64-64
[root@master hadoop-lzo-master]# chown -R hadoop:hadoop /opt/modules/hadoop/hadoop-1.0.3/lib/
5. 修改配置文件
a.#在hadoop-env.sh 添加,修改 这两行
[hadoop@master bin]$ cd /opt/modules/hadoop/hadoop-1.0.3/conf
#Lzo,Snappy,gzip 等压缩算法库地址
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native/Linux-amd64-64
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$HADOOP_CLASSPATH/hadoop-lzo-0.4.17-SNAPSHOT.jar
b.# 在core-site.xml中添加
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
c.#在mapred-site.xml中添加
<property>
<name>mapred.compress.map.output</name>
<value>true</value>
<!-- map 和 reduce 输出中间文件默认开启压缩 -->
</property>
<property>
<name>mapred.map.output.compression.codec</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
<!-- 使用 Lzo 库作为压缩算法 -->
</property>
6. 测试
[hadoop@master hadoop-1.0.3]$ cd /opt/modules/hadoop/hadoop-1.0.3
[hadoop@master hadoop-1.0.3]$ hadoop fs -put build.xml /user/hadoop/input/
当看到第二行 lzo.GPLNativeCodeLoader: Loaded native gpl library
恭喜你 安装成功
7 安装hadoop-lzo 一些报错记录
a.
执行 ant compile-native tar 报错
错误: error getting http://repo2.maven.org/maven2/org/apache/ivy/ivy/2.2.0/ivy-2.2.0 jar to.....
因无法连接到外网,所以无法从build.xml里指定的url下载所需包
在内网搭建服务器,将报错提示的软件包放入服务器目录中,并修改build.xml中ivy_repo_url。
ivysetting.xml中<property name="repo.maven.org"value="http://repo1.maven.org/maven2/",
value值也需修改成自定义服务器
重新执行 ant compile-native tar
报错:commons-logging#commons-logging;1.0.4: configuration not found in commons-logging#commons-logging;1.0.4:
'master'. It was required from com.hadoop.gplcompression#Hadoop-GPL-Compression;0.4.15 commons-logging
修改ivy.xml中conf="commons-logging->master"/>将master改成default
重新执行 ant compile-native tar成功
aa。
compile-java:
[javac] /data/hadoop-lzo-master/build.xml:243: warning: 'includeantruntime' was not set, defaulting to build.sysclasspath=last; set to false for repeatable builds
check-native-uptodate:
compile-native:
[javah] 错误: 找不到类org.apache.hadoop.conf.Configuration。
BUILD FAILED
/data/hadoop-lzo-master/build.xml:269: compilation failed
解决方法:
在 build.xml 里面将
<target name="compile-java" depends="init">
<javac
encoding="${build.encoding}"
srcdir="${java.src.dir}"
includes="**/*.java"
destdir="${build.classes}"
debug="${javac.debug}"
optimize="${javac.optimize}"
target="${javac.version}"
source="${javac.version}"
deprecation="${javac.deprecation}">
<compilerarg line="${javac.args} ${javac.args.warnings}" />
<classpath refid="classpath"/>
</javac>
<copy todir="${build.classes}">
<fileset dir="${java.src.dir}" includes="**/*.properties"/>
</copy>
</target>
加入一行
<target name="compile-java" depends="init">
<javac
includeantruntime="false"
encoding="${build.encoding}"
srcdir="${java.src.dir}"
includes="**/*.java"
destdir="${build.classes}"
debug="${javac.debug}"
optimize="${javac.optimize}"
target="${javac.version}"
source="${javac.version}"
deprecation="${javac.deprecation}">
<compilerarg line="${javac.args} ${javac.args.warnings}" />
<classpath refid="classpath"/>
</javac>
<copy todir="${build.classes}">
<fileset dir="${java.src.dir}" includes="**/*.properties"/>
</copy>
</target>
b.
compile-native:
[javah] 错误: 找不到类org.apache.hadoop.conf.Configuration。
BUILD FAILED
/data/hadoop-lzo-master/build.xml:270: compilation failed
解决方法:
在下面代码
<javah classpath="${build.classes}"
destdir="${build.native}/src/com/hadoop/compression/lzo"
force="yes"
verbose="yes">
<class name="com.hadoop.compression.lzo.LzoCompressor" />
<class name="com.hadoop.compression.lzo.LzoDecompressor" />
</javah>
加入一行
<javah classpath="${build.classes}"
destdir="${build.native}/src/com/hadoop/compression/lzo"
force="yes"
verbose="yes">
<class name="com.hadoop.compression.lzo.LzoCompressor" />
<class name="com.hadoop.compression.lzo.LzoDecompressor" />
<classpath refid="classpath" />
</javah>
c.
[hadoop@master hadoop]$ hadoop jar hadoop-examples-1.2.1.jar grep input output 'dfs[a-z].+'
13/10/29 01:31:00 ERROR lzo.GPLNativeCodeLoader: Could not load native gpl library
java.lang.UnsatisfiedLinkError: no gplcompression in java.library.path
at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1886)
at java.lang.Runtime.loadLibrary0(Runtime.java:849)
at java.lang.System.loadLibrary(System.java:1088)
at com.hadoop.compression.lzo.GPLNativeCodeLoader.<clinit>(GPLNativeCodeLoader.java:32)
at com.hadoop.compression.lzo.LzoCodec.<clinit>(LzoCodec.java:71)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:270)
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:810)
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:109)
at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:156)
at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:38)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117)
at org.apache.hadoop.mapred.JobConf.getInputFormat(JobConf.java:596)
at org.apache.hadoop.mapred.JobClient.writeOldSplits(JobClient.java:1081)
at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:1073)
at org.apache.hadoop.mapred.JobClient.access$700(JobClient.java:179)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:983)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:936)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:936)
at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:910)
at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1353)
at org.apache.hadoop.examples.Grep.run(Grep.java:69)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
at org.apache.hadoop.examples.Grep.main(Grep.java:93)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:68)
at org.apache.hadoop.util.ProgramDriver.driver(ProgramDriver.java:139)
at org.apache.hadoop.examples.ExampleDriver.main(ExampleDriver.java:64)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.main(RunJar.java:160)
13/10/29 01:31:00 ERROR lzo.LzoCodec: Cannot load native-lzo without native-hadoop
13/10/29 01:31:00 INFO mapred.FileInputFormat: Total input paths to process : 1
13/10/29 01:31:00 INFO mapred.JobClient: Running job: job_201310290129_0001
13/10/29 01:31:01 INFO mapred.JobClient: map 0% reduce 0%
解决方案:
cp /data/hadoop-lzo-master/build/native/Linux-amd64-64/lib/* $HADOOP_HOME/lib/native/Linux-amd64-64
下载地址:
http://rpm.pbone.net/index.php3/stat/4/idpl/12077295/dir/redhat_el_5/com/lzo2-devel-2.02-3.el5.rf.x86_64.rpm.html