1.SparkSession:
training = spark.read.format("csv").options(header='true',inferschema='true',encoding='gbk').load(r"hdfs://localhost:9000/taobao/dataset/train.csv")
2.SparkContext
# 加载数据 封装为row对象,转换为dataframe类型,第一列为特征,第二列为标签
training = spark.sparkContext.textFile("hdfs://localhost:9000/taobao/dataset/train.csv").map(lambda line:line.split(',')).map(lambda p:Row(**splitDF(p))).toDF()