使用Pycharm来实现Spark-SQL。
from pyspark import Row
from pyspark.sql import SparkSession
from pyspark.sql.types import StructField, StringType, StructType
if __name__ == "__main__":
spark = SparkSession\
.builder\
.appName("app name")\
.master("local")\
.getOrCreate()
sc = spark.sparkContext
line = sc.textFile("D:\\data\\demo.txt").map(lambda x: x.split('|'))
# personRdd = line.map(lambda p: Row(id=p[0], name=p[1], age=int(p[2])))
# personRdd_tmp = spark.createDataFrame(personRdd)
# personRdd_tmp.show()
#读取数据
schemaString = "id name age"
fields = list(map(lambda fieldName: StructField(fieldName, StringType(), nullable=True), schemaString.split(" ")))
schema = StructType(fields)
rowRDD = line.map(lambda attributes: Row(attributes[