SparkContext对象
#!/usr/bin/python
# -*- coding: utf-8 -*-
from pyspark import SparkConf, SparkContext
if __name__ == '__main__':
# 构建SparkContext对象
conf = SparkConf().setAppName("test").setMaster("local[*]")
# 构建SparkContext执行环境入口对象
sc = SparkContext(conf=conf)
# 读取文件获取数据 构建RDD
rdd = sc.textFile("./data/input/words.txt")
SparkSession对象
#!/usr/bin/python
# -*- coding: utf-8 -*-
# SparkSession对象的导包, 对象是来自于 pyspark.sql包中
from pyspark.sql import SparkSession
if __name__ == '__main__':
# 构建SparkSession执行环境入口对象
spark = SparkSession.builder.\
appName("test").\
config("spark.sql.shuffle.partitions", "4").\
getOrCreate()
# 通过SparkSession对象 获取 SparkContext对象
sc = spark.sparkContext
# SparkSQL的HelloWorld
df = spark.read.csv("./data/input/words.txt", sep=',', header=False)