针对某个单词计数版本0的优化版
1 用class来解决单词计数
2 可以任意输入文件和输入想要计数的单词
3 输出想要计数的单词的总个数
input: file word
output: numbers of word
# -*- coding:UTF-8 -*-
from pyspark import SparkContext
from pyspark import SparkConf
class Counter:
def __init__(self):
conf = SparkConf().setMaster("local").setAppName("words count")
self.sc = SparkContext(conf=conf)
# 读取文件
def loadFile(self, filePath):
self.inputRDD = self.sc.textFile(filePath)
# 针对输入word计数
def count(self, word):
linesRdd = self.inputRDD.filter(lambda line: word in line)
sum = 0
for line in linesRdd.collect():
sum += line.count(word)
return sum
def run(filePath, word):
counterWord = Counter()
counterWord.loadFile(filePath)
return counterWord.count(word)
if __name__ == '__main__':
file = raw_input('Please enter the file path:')
word = raw_input('What word do you want to count? :')
print('The result is ' + str(run(file, word)))