from pyspark import SparkContext
from pyspark import SparkConf
string_test = 'pyspark_test'
conf = SparkConf().setAppName(string_test).setMaster('yarn')
sc = SparkContext(conf=conf)
hdfs_data = sc.textFile("hdfs://master:9000/data/hive/warehouse/initial_data.db/appstart/appstart_copy_96")
hdfs_data.collect()
sc.stop()
master:9000为core-site.xml的fs.defaultFS配置项。