错误信息
java.lang.NoSuchMethodError: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.initSerdeParams(Lorg/apache/hadoop/conf/Configuration;Ljava/util/Properties;Ljava/lang/String;)Lorg/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe$SerDeParameters;
at org.apache.hadoop.hive.hbase.HBaseSerDeParameters.<init>(HBaseSerDeParameters.java:93)
at org.apache.hadoop.hive.hbase.HBaseSerDe.initialize(HBaseSerDe.java:92)
at org.apache.hadoop.hive.serde2.AbstractSerDe.initialize(AbstractSerDe.java:53)
at org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:521)
at org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:391)
at org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:276)
at org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:258)
at org.apache.hadoop.hive.ql.metadata.Table.getCols(Table.java:605)
at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getTableOption$1$$anonfun$apply$11.apply(HiveClientImpl.scala:377)
at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getTableOption$1$$anonfun$apply$11.apply(HiveClientImpl.scala:373)
at scala.Option.map(Option.scala:146)
at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getTableOption$1.apply(HiveClientImpl.scala:373)
at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getTableOption$1.apply(HiveClientImpl.scala:371)
at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:290)
at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:231)
at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:230)
at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:273)
at org.apache.spark.sql.hive.client.HiveClientImpl.getTableOption(HiveClientImpl.scala:371)
at org.apache.spark.sql.hive.client.HiveClient$class.getTable(HiveClient.scala:74)
at org.apache.spark.sql.hive.client.HiveClientImpl.getTable(HiveClientImpl.scala:79)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$org$apache$spark$sql$hive$HiveExternalCatalog$$getRawTable$1.apply(HiveExternalCatalog.scala:118)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$org$apache$spark$sql$hive$HiveExternalCatalog$$getRawTable$1.apply(HiveExternalCatalog.scala:118)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
at org.apache.spark.sql.hive.HiveExternalCatalog.org$apache$spark$sql$hive$HiveExternalCatalog$$getRawTable(HiveExternalCatalog.scala:117)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$getTable$1.apply(HiveExternalCatalog.scala:648)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$getTable$1.apply(HiveExternalCatalog.scala:648)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
at org.apache.spark.sql.hive.HiveExternalCatalog.getTable(HiveExternalCatalog.scala:647)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupRelation(SessionCatalog.scala:681)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveRelations$$lookupTableFromCatalog(Analyzer.scala:640)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.resolveRelation(Analyzer.scala:595)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:625)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:618)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:62)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:62)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:61)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$1.apply(LogicalPlan.scala:59)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$1.apply(LogicalPlan.scala:59)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:59)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:618)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:564)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82)
at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
at scala.collection.immutable.List.foldLeft(List.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74)
at scala.collection.immutable.List.foreach(List.scala:381)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74)
at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:69)
at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:67)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:50)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:66)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:623)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:691)
at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:62)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:340)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:248)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:755)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
错误背景
spark 与hbase集成时,当用spark sql 去查询Hbase映射到hive中的外部表时报这个错误,这个错误咋一看以为是缺包问题,遂检查是不是hbase 中有的包没有放到spark/jars下面来,但是没有发现自己少包呀。于是一狠心就Hbase 下的 hbase-*.jar包都拷贝到spark下来,同时将hive-hbase-handler-1.2.1.jar 和htrace-core-2.04.jar拷贝过来。详见如下
- hbase-*.jar
- hive-hbase-handler-1.2.1.jar
- htrace-core-2.04.jar
当我把包都拷完后依然报这个错误,顿时一脸懵逼呀,于是去网上搜索原因,百度基本搜索不出来我类似的问题,于是动用核武器,采用google搜索,google在英文领域果然效果好,能找到相关度很高的问题,spark的邮件列表也有这个问题,但是没有给出解决方案看来还是有不少人也遇到这个问题,总结这些答案,基本上俩个解决办法一个是少包,一个是版本不兼容,但是我看视频,我和老师用的版本是一样的,老师能行为什么我不行,所以我不想换掉hive的版本,很麻烦,又得重新导数据。顺便说下我的spark,hbase,hive的版本,供大家参考下
组件名称 | 版本号 |
---|---|
spark | 2.2.0 |
hbase | 0.98.6-cdh5.3.0 |
hive | 0.13.1-cdh5.3.0 |
由于错误还没有解决,网上又找不到好的解决办法,于是我尝试自己解决,由于报找不到这个方法,于是我找到这个类所在的jar包,如下命令搜索是否有这个方法
javap -classpath hive-serde-0.13.1.jar org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe | grep initSerdeParams
搜索结果如下:
public static org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe$SerDeParameters initSerdeParams(org.apache.hadoop.conf.Configuration, java.util.Properties, java.lang.String) throws org.apache.hadoop.hive.serde2.SerDeException;
图片版
说明这个类有这个方法呀,于是我干脆一不做二不休将hive-serde-0.13.1.jar 拷贝到spark目录下,结果奇迹出现了,spark-sql 可以顺利读取hbase表的内容了。
解决方案
将hive/lib目录下的hive-serde-0.13.1.jar拷贝到spark/jars目录下(前提是前面改拷的包都拷贝了)