# hdfs_map.pyimport sys
defread_input(file):for line infile:yield line.split()defmain():
data = read_input(sys.stdin)for words in data:for word in words:print("%s%s%d"%(word, '\t, l))# a 1# b 1# a 1 if __name__ =='__main__':
main()
reduce:
# hdfs_reduceimport sys
from operator import itemgetter
from itertools import groupby
defread_mapper_output(file, separator='\t'):for line infile:yield line.rstrip().split(separator,1)defmain():
data = read_mapper_output(sys.stdin)for current_word,group in groupby(data, itemgetter(0)):
total_count =sum(int(count)for current_word, count in group)print("%s%s%d"%(current_word,'\t', total_count))if __name__ =="__main__":
main()
map:# hdfs_map.pyimport sysdef read_input(file): for line in file: yield line.split()def main(): data = read_input(sys.stdin) for words in data: for word in words: ...