在()----> 1 df.select('counts')。rdd.foreach(lambda x:do_something(x))中的Py4JJavaError Traceback(最近一次通话最后一次)
/usr/hdp/2.5.3.0-37/spark/python/pyspark/rdd.py in foreach(self,f)745 f(x)746 return iter([])-> 747 self.mapPartitions(processPartition)。 count()#强制评估748749 def foreachPartition(self,f):
/usr/hdp/2.5.3.0-37/spark/python/pyspark/rdd.py in count((self)1002 3 1003“”“-> 1004 return self.mapPartitions(lambda i:[sum(1 for _ in i )])。sum()1005 1006 def stats(self):
/usr/hdp/2.5.3.0-37/spark/python/pyspark/rdd.py insum(self)993 6.0 994“”“-> 995 return self.mapPartitions(lambda x:[sum(x)]) .fold(0,operator.add)996997 def计数(自己):
/usr/hdp/2.5.3.0-37/spark/python/pyspark/rdd.py in fold(self,zeroValue,op)867#提供给每个分区的zeroValue是唯一的,从提供的那个868#到最终的reduce调用- -> 869 vals = self.mapPartitions(func).collect()870 return reduce(op,vals,zeroValue)871
/usr/hdp/2.5.3.0-37/spark/python/pyspark/rdd.py在collect(self)769“”“ 770中,带有SCCallSiteSync(self.context)作为CSS:-> 771 port = self.ctx。 772 _jvm.PythonRDD.collectAndServe(self._jrdd.rdd())返回列表(_load_from_socket(端口,self._jrdd_deserializer))773
/usr/hdp/2.5.3.0-37/spark/python/lib/py4j-0.9-src.zip/py4j/java_gateway.py在调用中(self,* args)811 answer = self.gateway_client.send_command(command)812 return_value = get_return_value(-> 813答案,self.gateway_client,self.target_id,self.name)814 815对于temp_args中的temp_arg:
/usr/hdp/2.5.3.0-37/spark/python/pyspark/sql/utils.py in deco(* a,** kw)43 def deco(* a,** kw):44试试:- > 45返回f(* a,** kw)46除py4j.protocol.Py4JJavaError以外为e:47 s = e.java_exception.toString()
/usr/hdp/2.5.3.0-37/spark/python/lib/py4j-0.9-src.zip/py4j/protocol.py in get_return_value(answer,gateway_client,target_id,name)306提高Py4JJavaError(307“发生错误调用{0} {1} {2}时。\ n“。-> 308 format(target_id,”。“,name),value)309否则:310引发Py4JError(