Mapper程序import sysdef read_input(file):
for line in file: yield line.split()def main():
data = read_input(sys.stdin) for words in data: for word in words:
print("%s%s%d" %(word,'\t',1))if __name__ == "__main__":
main()
分割单词,以一下形式输出a 1
b 1
c 1
a 1
reducer程序,统计词语频率import sysfrom operator import itemgetterfrom itertools import groupbydef read_mapper_output(file,separator='\t'):
for line in file: yield line.rstrip().split(separator,1)def main():
data =read_mapper_output(sys.stdin) for current_word,group in groupby(data,itemgetter(0)):
total_count = sum(int(count) for current_word,count in group)
print("%s %s %d" %(current_word,'\t',total_count))if __name__ =='__main__':
main()
本地运行测试&