环境:
win10 64位,联想工作站台式
idea 2018.3社区免费版
问题:alsModel.recommendForAllUsers(2).show报错如下
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task not serializable: java.io.NotSerializableException: scala.reflect.api.TypeTags$PredefTypeCreator
Serialization stack:
- object not serializable (class: scala.reflect.api.TypeTags$PredefTypeCreator, value: scala.reflect.api.TypeTags$PredefTypeCreator@2b43acd5)
- writeObject data (class: scala.reflect.api.SerializedTypeTag)
- object (class scala.reflect.api.SerializedTypeTag, scala.reflect.api.SerializedTypeTag@7454b37c)
- writeReplace data (class: scala.reflect.api.SerializedTypeTag)
- object (class scala.reflect.api.TypeTags$PredefTypeTag, TypeTag[Int])
- field (class: org.apache.spark.ml.recommendation.TopByKeyAggregator, name: org$apache$spark$ml$recommendation$TopByKeyAggregator$$evidence$2, type: interface scala.reflect.api.TypeTags$TypeTag)
- object (class org.apache.spark.ml.recommendation.TopByKeyAggregator, org.apache.spark.ml.recommendation.TopByKeyAggregator@7e69bf78)
- field (class: org.apache.spark.sql.execution.aggregate.ComplexTypedAggregateExpression, name: aggregator, type: class org.apache.spark.sql.expressions.Aggregator)
- object (class org.apache.spark.sql.execution.aggregate.ComplexTypedAggregateExpression, TopByKeyAggregator(scala.Tuple3))
- field (class: org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression, name: aggregateFunction, type: class org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction)
- object (class org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression, partial_topbykeyaggregator(org.apache.spark.ml.recommendation.TopByKeyAggregator@7e69bf78, Some(newInstance(class scala.Tuple3)), Some(class scala.Tuple3), Some(StructType(StructField(_1,IntegerType,false), StructField(_2,IntegerType,false), StructField(_3,FloatType,false))), encodeusingserializer(input[0, java.lang.Object, true], true) AS value#86, decodeusingserializer(input[0, binary, true], org.apache.spark.util.BoundedPriorityQueue, true), mapobjects(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), if (isnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))) null else named_struct(_1, assertnotnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))._1, _2, assertnotnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))._2), input[0, [Lscala.Tuple2;, true], None) AS value#85, ArrayType(StructType(StructField(_1,IntegerType,false), StructField(_2,FloatType,false)),true), true, 0, 0))
- element of array (index: 0)
- array (class [Ljava.lang.Object;, size 1)
- field (class: scala.collection.mutable.ArrayBuffer, name: array, type: class [Ljava.lang.Object;)
- object (class scala.collection.mutable.ArrayBuffer, ArrayBuffer(partial_topbykeyaggregator(org.apache.spark.ml.recommendation.TopByKeyAggregator@7e69bf78, Some(newInstance(class scala.Tuple3)), Some(class scala.Tuple3), Some(StructType(StructField(_1,IntegerType,false), StructField(_2,IntegerType,false), StructField(_3,FloatType,false))), encodeusingserializer(input[0, java.lang.Object, true], true) AS value#86, decodeusingserializer(input[0, binary, true], org.apache.spark.util.BoundedPriorityQueue, true), mapobjects(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), if (isnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))) null else named_struct(_1, assertnotnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))._1, _2, assertnotnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))._2), input[0, [Lscala.Tuple2;, true], None) AS value#85, ArrayType(StructType(StructField(_1,IntegerType,false), StructField(_2,FloatType,false)),true), true, 0, 0)))
- field (class: org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec, name: aggregateExpressions, type: interface scala.collection.Seq)
- object (class org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec, ObjectHashAggregate(keys=[value#84], functions=[partial_topbykeyaggregator(org.apache.spark.ml.recommendation.TopByKeyAggregator@7e69bf78, Some(newInstance(class scala.Tuple3)), Some(class scala.Tuple3), Some(StructType(StructField(_1,IntegerType,false), StructField(_2,IntegerType,false), StructField(_3,FloatType,false))), encodeusingserializer(input[0, java.lang.Object, true], true) AS value#86, decodeusingserializer(input[0, binary, true], org.apache.spark.util.BoundedPriorityQueue, true), mapobjects(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), if (isnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))) null else named_struct(_1, assertnotnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))._1, _2, assertnotnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))._2), input[0, [Lscala.Tuple2;, true], None) AS value#85, ArrayType(StructType(StructField(_1,IntegerType,false), StructField(_2,FloatType,false)),true), true, 0, 0)], output=[value#84, buf#120])
+- AppendColumnsWithObject <function1>, [assertnotnull(input[0, scala.Tuple3, true])._1 AS _1#76, assertnotnull(input[0, scala.Tuple3, true])._2 AS _2#77, assertnotnull(input[0, scala.Tuple3, true])._3 AS _3#78], [input[0, int, false] AS value#84]
+- MapPartitions <function1>, obj#75: scala.Tuple3
+- DeserializeToObject newInstance(class scala.Tuple2), obj#74: scala.Tuple2
+- CartesianProduct
:- *(2) SerializeFromObject [mapobjects(MapObjects_loopValue5, MapObjects_loopIsNull5, ObjectType(class scala.Tuple2), if (isnull(lambdavariable(MapObjects_loopValue5, MapObjects_loopIsNull5, ObjectType(class scala.Tuple2), true))) null else named_struct(_1, assertnotnull(lambdavariable(MapObjects_loopValue5, MapObjects_loopIsNull5, ObjectType(class scala.Tuple2), true))._1, _2, staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(FloatType,false), fromPrimitiveArray, assertnotnull(lambdavariable(MapObjects_loopValue5, MapObjects_loopIsNull5, ObjectType(class scala.Tuple2), true))._2, true, false)), input[0, scala.collection.Seq, true], None) AS value#58]
: +- MapPartitions <function1>, obj#57: scala.collection.Seq
: +- DeserializeToObject newInstance(class scala.Tuple2), obj#56: scala.Tuple2
: +- *(1) Project [_1#35 AS id#38, _2#36 AS features#39]
: +- *(1) SerializeFromObject [assertnotnull(input[0, scala.Tuple2, true])._1 AS _1#35, staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(FloatType,false), fromPrimitiveArray, assertnotnull(input[0, scala.Tuple2, true])._2, true, false) AS _2#36]
: +- Scan ExternalRDDScan[obj#34]
+- *(4) SerializeFromObject [mapobjects(MapObjects_loopValue8, MapObjects_loopIsNull8, ObjectType(class scala.Tuple2), if (isnull(lambdavariable(MapObjects_loopValue8, MapObjects_loopIsNull8, ObjectType(class scala.Tuple2), true))) null else named_struct(_1, assertnotnull(lambdavariable(MapObjects_loopValue8, MapObjects_loopIsNull8, ObjectType(class scala.Tuple2), true))._1, _2, staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(FloatType,false), fromPrimitiveArray, assertnotnull(lambdavariable(MapObjects_loopValue8, MapObjects_loopIsNull8, ObjectType(class scala.Tuple2), true))._2, true, false)), input[0, scala.collection.Seq, true], None) AS value#65]
+- MapPartitions <function1>, obj#64: scala.collection.Seq
+- DeserializeToObject newInstance(class scala.Tuple2), obj#63: scala.Tuple2
+- *(3) Project [_1#45 AS id#48, _2#46 AS features#49]
+- *(3) SerializeFromObject [assertnotnull(input[0, scala.Tuple2, true])._1 AS _1#45, staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(FloatType,false), fromPrimitiveArray, assertnotnull(input[0, scala.Tuple2, true])._2, true, false) AS _2#46]
+- Scan ExternalRDDScan[obj#44]
)
- field (class: org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec$$anonfun$doExecute$1, name: $outer, type: class org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec)
- object (class org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec$$anonfun$doExecute$1, <function0>)
- field (class: org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec$$anonfun$doExecute$1$$anonfun$2, name: $outer, type: class org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec$$anonfun$doExecute$1)
- object (class org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec$$anonfun$doExecute$1$$anonfun$2, <function2>)
- field (class: org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndexInternal$1, name: f$22, type: interface scala.Function2)
- object (class org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndexInternal$1, <function0>)
- field (class: org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndexInternal$1$$anonfun$apply$24, name: $outer, type: class org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndexInternal$1)
- object (class org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndexInternal$1$$anonfun$apply$24, <function3>)
- field (class: org.apache.spark.rdd.MapPartitionsRDD, name: f, type: interface scala.Function3)
- object (class org.apache.spark.rdd.MapPartitionsRDD, MapPartitionsRDD[273] at show at cs.scala:206)
- field (class: org.apache.spark.NarrowDependency, name: _rdd, type: class org.apache.spark.rdd.RDD)
- object (class org.apache.spark.OneToOneDependency, org.apache.spark.OneToOneDependency@ed55d3e)
- writeObject data (class: scala.collection.immutable.List$SerializationProxy)
- object (class scala.collection.immutable.List$SerializationProxy, scala.collection.immutable.List$SerializationProxy@48a035e5)
- writeReplace data (class: scala.collection.immutable.List$SerializationProxy)
- object (class scala.collection.immutable.$colon$colon, List(org.apache.spark.OneToOneDependency@ed55d3e))
- field (class: org.apache.spark.rdd.RDD, name: org$apache$spark$rdd$RDD$$dependencies_, type: interface scala.collection.Seq)
- object (class org.apache.spark.rdd.MapPartitionsRDD, MapPartitionsRDD[274] at show at cs.scala:206)
- field (class: scala.Tuple2, name: _1, type: class java.lang.Object)
- object (class scala.Tuple2, (MapPartitionsRDD[274] at show at cs.scala:206,org.apache.spark.ShuffleDependency@52f57176))
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1587)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1586)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1586)
at org.apache.spark.scheduler.DAGScheduler.submitMissingTasks(DAGScheduler.scala:1043)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:947)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$submitStage$4.apply(DAGScheduler.scala:950)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$submitStage$4.apply(DAGScheduler.scala:949)
at scala.collection.immutable.List.foreach(List.scala:383)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:949)
at org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:891)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1777)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2027)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2048)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2067)
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:363)
at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3272)
at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2484)
at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2484)
at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3253)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3252)
at org.apache.spark.sql.Dataset.head(Dataset.scala:2484)
at org.apache.spark.sql.Dataset.take(Dataset.scala:2698)
at org.apache.spark.sql.Dataset.showString(Dataset.scala:254)
at org.apache.spark.sql.Dataset.show(Dataset.scala:723)
at org.apache.spark.sql.Dataset.show(Dataset.scala:682)
at org.apache.spark.sql.Dataset.show(Dataset.scala:691)
at cs$.hh(cs.scala:206)
at cs$.main(cs.scala:38)
at cs.main(cs.scala)
Caused by: java.io.NotSerializableException: scala.reflect.api.TypeTags$PredefTypeCreator
Serialization stack:
- object not serializable (class: scala.reflect.api.TypeTags$PredefTypeCreator, value: scala.reflect.api.TypeTags$PredefTypeCreator@2b43acd5)
- writeObject data (class: scala.reflect.api.SerializedTypeTag)
- object (class scala.reflect.api.SerializedTypeTag, scala.reflect.api.SerializedTypeTag@7454b37c)
- writeReplace data (class: scala.reflect.api.SerializedTypeTag)
- object (class scala.reflect.api.TypeTags$PredefTypeTag, TypeTag[Int])
- field (class: org.apache.spark.ml.recommendation.TopByKeyAggregator, name: org$apache$spark$ml$recommendation$TopByKeyAggregator$$evidence$2, type: interface scala.reflect.api.TypeTags$TypeTag)
- object (class org.apache.spark.ml.recommendation.TopByKeyAggregator, org.apache.spark.ml.recommendation.TopByKeyAggregator@7e69bf78)
- field (class: org.apache.spark.sql.execution.aggregate.ComplexTypedAggregateExpression, name: aggregator, type: class org.apache.spark.sql.expressions.Aggregator)
- object (class org.apache.spark.sql.execution.aggregate.ComplexTypedAggregateExpression, TopByKeyAggregator(scala.Tuple3))
- field (class: org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression, name: aggregateFunction, type: class org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction)
- object (class org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression, partial_topbykeyaggregator(org.apache.spark.ml.recommendation.TopByKeyAggregator@7e69bf78, Some(newInstance(class scala.Tuple3)), Some(class scala.Tuple3), Some(StructType(StructField(_1,IntegerType,false), StructField(_2,IntegerType,false), StructField(_3,FloatType,false))), encodeusingserializer(input[0, java.lang.Object, true], true) AS value#86, decodeusingserializer(input[0, binary, true], org.apache.spark.util.BoundedPriorityQueue, true), mapobjects(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), if (isnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))) null else named_struct(_1, assertnotnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))._1, _2, assertnotnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))._2), input[0, [Lscala.Tuple2;, true], None) AS value#85, ArrayType(StructType(StructField(_1,IntegerType,false), StructField(_2,FloatType,false)),true), true, 0, 0))
- element of array (index: 0)
- array (class [Ljava.lang.Object;, size 1)
- field (class: scala.collection.mutable.ArrayBuffer, name: array, type: class [Ljava.lang.Object;)
- object (class scala.collection.mutable.ArrayBuffer, ArrayBuffer(partial_topbykeyaggregator(org.apache.spark.ml.recommendation.TopByKeyAggregator@7e69bf78, Some(newInstance(class scala.Tuple3)), Some(class scala.Tuple3), Some(StructType(StructField(_1,IntegerType,false), StructField(_2,IntegerType,false), StructField(_3,FloatType,false))), encodeusingserializer(input[0, java.lang.Object, true], true) AS value#86, decodeusingserializer(input[0, binary, true], org.apache.spark.util.BoundedPriorityQueue, true), mapobjects(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), if (isnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))) null else named_struct(_1, assertnotnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))._1, _2, assertnotnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))._2), input[0, [Lscala.Tuple2;, true], None) AS value#85, ArrayType(StructType(StructField(_1,IntegerType,false), StructField(_2,FloatType,false)),true), true, 0, 0)))
- field (class: org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec, name: aggregateExpressions, type: interface scala.collection.Seq)
- object (class org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec, ObjectHashAggregate(keys=[value#84], functions=[partial_topbykeyaggregator(org.apache.spark.ml.recommendation.TopByKeyAggregator@7e69bf78, Some(newInstance(class scala.Tuple3)), Some(class scala.Tuple3), Some(StructType(StructField(_1,IntegerType,false), StructField(_2,IntegerType,false), StructField(_3,FloatType,false))), encodeusingserializer(input[0, java.lang.Object, true], true) AS value#86, decodeusingserializer(input[0, binary, true], org.apache.spark.util.BoundedPriorityQueue, true), mapobjects(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), if (isnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))) null else named_struct(_1, assertnotnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))._1, _2, assertnotnull(lambdavariable(MapObjects_loopValue26, MapObjects_loopIsNull26, ObjectType(class scala.Tuple2), true))._2), input[0, [Lscala.Tuple2;, true], None) AS value#85, ArrayType(StructType(StructField(_1,IntegerType,false), StructField(_2,FloatType,false)),true), true, 0, 0)], output=[value#84, buf#120])
+- AppendColumnsWithObject <function1>, [assertnotnull(input[0, scala.Tuple3, true])._1 AS _1#76, assertnotnull(input[0, scala.Tuple3, true])._2 AS _2#77, assertnotnull(input[0, scala.Tuple3, true])._3 AS _3#78], [input[0, int, false] AS value#84]
+- MapPartitions <function1>, obj#75: scala.Tuple3
+- DeserializeToObject newInstance(class scala.Tuple2), obj#74: scala.Tuple2
+- CartesianProduct
:- *(2) SerializeFromObject [mapobjects(MapObjects_loopValue5, MapObjects_loopIsNull5, ObjectType(class scala.Tuple2), if (isnull(lambdavariable(MapObjects_loopValue5, MapObjects_loopIsNull5, ObjectType(class scala.Tuple2), true))) null else named_struct(_1, assertnotnull(lambdavariable(MapObjects_loopValue5, MapObjects_loopIsNull5, ObjectType(class scala.Tuple2), true))._1, _2, staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(FloatType,false), fromPrimitiveArray, assertnotnull(lambdavariable(MapObjects_loopValue5, MapObjects_loopIsNull5, ObjectType(class scala.Tuple2), true))._2, true, false)), input[0, scala.collection.Seq, true], None) AS value#58]
: +- MapPartitions <function1>, obj#57: scala.collection.Seq
: +- DeserializeToObject newInstance(class scala.Tuple2), obj#56: scala.Tuple2
: +- *(1) Project [_1#35 AS id#38, _2#36 AS features#39]
: +- *(1) SerializeFromObject [assertnotnull(input[0, scala.Tuple2, true])._1 AS _1#35, staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(FloatType,false), fromPrimitiveArray, assertnotnull(input[0, scala.Tuple2, true])._2, true, false) AS _2#36]
: +- Scan ExternalRDDScan[obj#34]
+- *(4) SerializeFromObject [mapobjects(MapObjects_loopValue8, MapObjects_loopIsNull8, ObjectType(class scala.Tuple2), if (isnull(lambdavariable(MapObjects_loopValue8, MapObjects_loopIsNull8, ObjectType(class scala.Tuple2), true))) null else named_struct(_1, assertnotnull(lambdavariable(MapObjects_loopValue8, MapObjects_loopIsNull8, ObjectType(class scala.Tuple2), true))._1, _2, staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(FloatType,false), fromPrimitiveArray, assertnotnull(lambdavariable(MapObjects_loopValue8, MapObjects_loopIsNull8, ObjectType(class scala.Tuple2), true))._2, true, false)), input[0, scala.collection.Seq, true], None) AS value#65]
+- MapPartitions <function1>, obj#64: scala.collection.Seq
+- DeserializeToObject newInstance(class scala.Tuple2), obj#63: scala.Tuple2
+- *(3) Project [_1#45 AS id#48, _2#46 AS features#49]
+- *(3) SerializeFromObject [assertnotnull(input[0, scala.Tuple2, true])._1 AS _1#45, staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(FloatType,false), fromPrimitiveArray, assertnotnull(input[0, scala.Tuple2, true])._2, true, false) AS _2#46]
+- Scan ExternalRDDScan[obj#44]
)
- field (class: org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec$$anonfun$doExecute$1, name: $outer, type: class org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec)
- object (class org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec$$anonfun$doExecute$1, <function0>)
- field (class: org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec$$anonfun$doExecute$1$$anonfun$2, name: $outer, type: class org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec$$anonfun$doExecute$1)
- object (class org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec$$anonfun$doExecute$1$$anonfun$2, <function2>)
- field (class: org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndexInternal$1, name: f$22, type: interface scala.Function2)
- object (class org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndexInternal$1, <function0>)
- field (class: org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndexInternal$1$$anonfun$apply$24, name: $outer, type: class org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndexInternal$1)
- object (class org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndexInternal$1$$anonfun$apply$24, <function3>)
- field (class: org.apache.spark.rdd.MapPartitionsRDD, name: f, type: interface scala.Function3)
- object (class org.apache.spark.rdd.MapPartitionsRDD, MapPartitionsRDD[273] at show at cs.scala:206)
- field (class: org.apache.spark.NarrowDependency, name: _rdd, type: class org.apache.spark.rdd.RDD)
- object (class org.apache.spark.OneToOneDependency, org.apache.spark.OneToOneDependency@ed55d3e)
- writeObject data (class: scala.collection.immutable.List$SerializationProxy)
- object (class scala.collection.immutable.List$SerializationProxy, scala.collection.immutable.List$SerializationProxy@48a035e5)
- writeReplace data (class: scala.collection.immutable.List$SerializationProxy)
- object (class scala.collection.immutable.$colon$colon, List(org.apache.spark.OneToOneDependency@ed55d3e))
- field (class: org.apache.spark.rdd.RDD, name: org$apache$spark$rdd$RDD$$dependencies_, type: interface scala.collection.Seq)
- object (class org.apache.spark.rdd.MapPartitionsRDD, MapPartitionsRDD[274] at show at cs.scala:206)
- field (class: scala.Tuple2, name: _1, type: class java.lang.Object)
- object (class scala.Tuple2, (MapPartitionsRDD[274] at show at cs.scala:206,org.apache.spark.ShuffleDependency@52f57176))
at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40)
at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:46)
at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:100)
at org.apache.spark.scheduler.DAGScheduler.submitMissingTasks(DAGScheduler.scala:1031)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:947)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$submitStage$4.apply(DAGScheduler.scala:950)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$submitStage$4.apply(DAGScheduler.scala:949)
at scala.collection.immutable.List.foreach(List.scala:383)
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:949)
at org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:891)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1777)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
网上百度多是讲在work闭包(map、foreach等)中用了mater中的不可序列化对象导致,需要将对象广播或者将不可序列化的对象extends Serializable,显然不符合我的实际情况;找了一天未解决。
解决办法:
最后想起我的project可能有些问题,尝试invalidate cashes / restart 无效;
新建project,将idea自带的Scala2.11.0换成我下载的Scala2.11.12后,复制代码到新project再次运行alsModel.recommendForAllUsers(2).show一切正常!