import os
import sys
os.environ['SPARK_HOME'] = '/opt/spark'
sys.path.append("/opt/spark/python")
from pyspark import SparkContext
from pyspark import SparkConf
sc = SparkContext("spark://node0:7077", "WordCount")
text_file = sc.textFile("/user/root/README.md")
counts = text_file.flatMap(lambda line: line.split(" ")).map(lambda word: (word, 1)).reduceByKey(lambda a, b: a + b)
print counts.collect()
spark 2.2.0 wordcount python版
最新推荐文章于 2022-05-11 21:02:41 发布