sudo pip install findspark
cd /usr/local
sudo tar -zxy ~/Dowmloads/pycharm-community-2022.3.1.tar.gz
cd pycharm-community-2022.3.1
bin/pycharm.sh
# 导入必要的Spark类
from pyspark import SparkContext
from pyspark.sql import SparkSession
# 创建SparkSession实例(Spark 2.0+推荐方式)
spark = SparkSession.builder.master("local[1]") \
.appName('SparkExample') \
.getOrCreate()
# 从SparkSession获取SparkContext
sc = spark.sparkContext
# 创建RDD(从1到4的范围)
rdd = sc.parallelize(range(1,5))
# 收集并打印RDD内容
print(rdd.collect()) # 输出: [1, 2, 3, 4]