1.编写mapper函数
mapper.py
import sys
for line in sys.stdin:
word_line = line.strip().split(' ')
for word in word_line:
print '\t'.join([word.strip(), '1'])
2.编写执行脚本
import sys
cur_word = ''
cur_count = 0
for line in sys.stdin:
wdline = line.strip().split('\t')
if len(wdline) != 2:
continue