from pyspark import SparkContext,SparkConf
import os
os.environ["PYSPARK_PYTHON"] = "C:/python312/python.exe"
# os.environ['SPARK_HOME'] = "C:/python312/python.exe"
conf = SparkConf().setMaster("local[*]").setAppName("test")
conf.set("spark.executorEnv.PYSPARK_PYTHON","C:/python312/python.exe")
sc = SparkContext(conf=conf)
sc.setLogLevel("OFF")
rdd = sc.parallelize([1,2,3,4,5])
print(rdd.collect())
def add(v):
return v+1
rdd1 = rdd.map(add)
print(rdd1.collect())
sc.stop()
报错信息:
Traceback (most recent call last):
File "D:\workspace\py\echartTest\echartTest\pysparkpy.py", line 14, in <module>
print(rdd1.collect())
^^^^^^^^^^^^^^
File "C:\Python312\Lib\site-packages\pyspark\rdd.py", line 1833, in collect
sock_info = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python312\Lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
return_value = get_return_value(
^^^^^^^^^^^^^^^^^
py4j.protocol.Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 1.0 failed 1 times, most recent failure: Lost task 2.0 in stage 1.0 (TID 6) (pp executor driver): org.apache.spark.SparkException: Python worker exited unexpectedly (crashed)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:612)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:594)
at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:38)
at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:789)
学习python开始遇到这个问题,开始使用的python3.12搭配pyspark3.5.2或者pyspark3.5.3都不行,指定了PYSPARK_PYTHON以及JAVA_HOME这些都没有生效,后续降低了python到3.10版本问题解决