from pyspark import SparkContext
textFile = SparkContext().textFile("/data/test01.txt")
wordCount = (
textFile.flatMap(lambda line: line.split(" "))
.filter(lambda word: word != "")
.map(lambda word: (word, 1))
.reduceByKey(lambda x, y: x + y)
.sortBy(lambda x: x[1], False)
.take(5)
)
print(wordCount)
pyspark示例
最新推荐文章于 2024-03-19 23:21:27 发布