map函数如下
import sys
# 调用标准输入流
for line in sys.stdin:
# 读取文本内容
line = line.strip()
# 对文本内容分词,形成一个列表
words = line.split()
# 读取列表中每一个元素的值
for word in words:
# map函数输出,key为word,下一步将进行shuffle过程,将按照key排序,输出,这两步为map阶段工作为,在本地节点进行
print '%s\t%s' % (word, 1)
reducer函数如下,注意这里必须设置只有一个reduce任务
#!/usr/bin/env python
from operator import itemgetter
import sys
current_word = None
current_count = 0
word = None
sum_count=0
record_word=[]
record_count=[]
for line in sys.stdin:
line = line.strip()
word, count = line.split('\t', 1)
sum_count=sum_count+1
try:
count = int(count)
except ValueError:
continue
if