下面分别贴出python、scala、java版本的wordcount程序:
python版:
import logging
from operator import add
from pyspark import SparkContext
logging.basicConfig(format='%(message)s', level=logging.INFO)
#import local file
test_file_name = "file:///home/yq/worldcount.py"
#此时spark-out目录不要创建,会自动生成
out_file_name = "file:///home/yq/spark-out"
sc = SparkContext("local","wordcount app")
# text_file rdd object
text_file = sc.textFile(test_file_name)
# counts
counts = text_file.flatMap(lambda line: line.split(" ")).map(lambda w