使用Scala开发Spark程序的时候, 发现如下问题
java.io.IOException: HADOOP_HOME or hadoop.home.dir are not set.
13:45:08.277 [main] DEBUG o.a.h.m.impl.MetricsSystemImpl - UgiMetrics, User and group related metrics
13:45:08.303 [main] DEBUG o.a.h.security.UserGroupInformation - PrivilegedAction as:hdfs (auth:SIMPLE) from:org.apache.hadoop.fs.FileSystem.get(FileSystem.java:153)
13:45:08.819 [main] DEBUG o.a.hadoop.hdfs.BlockReaderLocal - dfs.client.use.legacy.blockreader.local = false
13:45:08.822 [main] DEBUG o.a.hadoop.hdfs.BlockReaderLocal - dfs.client.read.shortcircuit = false
13:45:08.822 [main] DEBUG o.a.hadoop.hdfs.BlockReaderLocal - dfs.client.domain.socket.data.traffic = false
13:45:08.822 [main] DEBUG o.a.hadoop.hdfs.BlockReaderLocal - dfs.domain.socket.path =
13:45:08.879 [main] DEBUG o.a.h.m.impl.MetricsSystemImpl - StartupProgress, NameNode startup progress
13:45:09.046 [main] DEBUG o.apache.hadoop.io.retry.RetryUtils - multipleLinearRandomRetry = null
13:45:09.145 [main] DEBUG org.apache.hadoop.security.Groups - Creating new Groups object
13:45:09.149 [main] DEBUG o.a.hadoop.util.NativeCodeLoader - Trying to load the custom-built native-hadoop library...
13:45:09.151 [main] DEBUG o.a.hadoop.util.NativeCodeLoader - Failed to load native-hadoop with error: java.lang.UnsatisfiedLinkError: no hadoop in java.library.path
13:45:09.151 [main] DEBUG o.a.hadoop.util.NativeCodeLoader - java.library.path=/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib
13:45:09.151 [main] WARN o.a.hadoop.util.NativeCodeLoader - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
13:45:09.152 [main] DEBUG o.a.h.s.JniBasedUnixGroupsMappingWithFallback - Falling back to shell based
13:45:09.155 [main] DEBUG o.a.h.s.JniBasedUnixGroupsMappingWithFallback - Group mapping impl=org.apache.hadoop.security.ShellBasedUnixGroupsMapping
13:45:09.156 [main] DEBUG org.apache.hadoop.security.Groups - Group mapping impl=org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback; cacheTimeout=300000
13:45:09.182 [main] DEBUG org.apache.hadoop.ipc.Server - rpcKind=RPC_PROTOCOL_BUFFER, rpcRequestWrapperClass=class org.apache.hadoop.ipc.ProtobufRpcEngine$RpcRequestWrapper, rpcInvoker=org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker@516be40f
13:45:09.621 [main] DEBUG o.a.hadoop.hdfs.BlockReaderLocal - Both short-circuit local reads and UNIX domain socket are disabled.
13:45:09.646 [main] DEBUG org.apache.hadoop.util.Shell - Failed to detect a valid hadoop home directory
java.io.IOException: HADOOP_HOME or hadoop.home.dir are not set.
at org.apache.hadoop.util.Shell.checkHadoopHome(Shell.java:225) [hadoop-common-2.2.0.jar:na]
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:250) [hadoop-common-2.2.0.jar:na]
at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:76) [hadoop-common-2.2.0.jar:na]
at org.apache.hadoop.conf.Configuration.getTrimmedStrings(Configuration.java:1546) [hadoop-common-2.2.0.jar:na]
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:519) [hadoop-hdfs-2.2.0.jar:na]
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:453) [hadoop-hdfs-2.2.0.jar:na]
at org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:136) [hadoop-hdfs-2.2.0.jar:na]
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2433) [hadoop-common-2.2.0.jar:na]
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:88) [hadoop-common-2.2.0.jar:na]
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2467) [hadoop-common-2.2.0.jar:na]
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2449) [hadoop-common-2.2.0.jar:na]
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:367) [hadoop-common-2.2.0.jar:na]
at org.apache.hadoop.fs.FileSystem$1.run(FileSystem.java:156) [hadoop-common-2.2.0.jar:na]
at org.apache.hadoop.fs.FileSystem$1.run(FileSystem.java:153) [hadoop-common-2.2.0.jar:na]
at java.security.AccessController.doPrivileged(Native Method) [na:1.8.0_162]
at javax.security.auth.Subject.doAs(Subject.java:422) [na:1.8.0_162]
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491) [hadoop-common-2.2.0.jar:na]
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:153) [hadoop-common-2.2.0.jar:na]
at info.aoye.WordSeg.HadoopFileIOAdapter.open(HadoopFileIOAdapter.scala:13) [wordseg.jar:na]
at com.hankcs.hanlp.corpus.io.ByteArrayStream.createByteArrayStream(ByteArrayStream.java:42) [hanlp-portable-1.6.8.jar:na]
at com.hankcs.hanlp.model.perceptron.model.LinearModel.load(LinearModel.java:387) [hanlp-portable-1.6.8.jar:na]
at com.hankcs.hanlp.model.crf.LogLinearModel.<init>(LogLinearModel.java:92) [hanlp-portable-1.6.8.jar:na]
at com.hankcs.hanlp.model.crf.CRFTagger.<init>(CRFTagger.java:41) [hanlp-portable-1.6.8.jar:na]
at com.hankcs.hanlp.model.crf.CRFSegmenter.<init>(CRFSegmenter.java:47) [hanlp-portable-1.6.8.jar:na]
at com.hankcs.hanlp.model.crf.CRFSegmenter.<init>(CRFSegmenter.java:42) [hanlp-portable-1.6.8.jar:na]
at com.hankcs.hanlp.model.crf.CRFLexicalAnalyzer.<init>(CRFLexicalAnalyzer.java:104) [hanlp-portable-1.6.8.jar:na]
at info.aoye.WordSeg.Demo2$.main(Demo2.scala:13) [wordseg.jar:na]
at info.aoye.WordSeg.Demo2.main(Demo2.scala) [wordseg.jar:na]
13:45:09.798 [main] DEBUG org.apache.hadoop.util.Shell - setsid exited with exit code 0
13:45:09.855 [main] DEBUG org.apache.hadoop.ipc.Client - The ping interval is 60000 ms.
13:45:09.858 [main] DEBUG org.apache.hadoop.ipc.Client - Connecting to localhost/127.0.0.1:8020
13:45:09.894 [main] DEBUG org.apache.hadoop.ipc.Client - closing ipc connection to localhost/127.0.0.1:8020: 拒绝连接
解决方法:
在代码中添加如下内容: System.setProperty("hadoop.home.dir", "/usr/hdp/2.6.4.0-91/hadoop")
package info.aoye.WordSeg
import com.hankcs.hanlp.HanLP
import com.hankcs.hanlp.model.crf.CRFLexicalAnalyzer
object Demo2{
def main(args: Array[String]): Unit = {
// 解决:在这个位置添加如下配置信息即可
System.setProperty("hadoop.home.dir", "/usr/hdp/2.6.4.0-91/hadoop")
HanLP.Config.IOAdapter = new HadoopFileIOAdapter
val segment = new CRFLexicalAnalyzer()
println(segment.seg(str))
}
}