#-*- coding:utf-8 -*-
#!/usr/bin/env python
import sys
#读取输入的文件
for line in sys.stdin:
#去除开头与结尾的空格
line = line.strip()
#把每行单词切开,输出<word> 1
words = line.split()
for word in words:
print '%s\t%s' % (word, 1)
#-*- coding:utf-8 -*-
#!/usr/bin/env python
import sys
word2count = {}
#文件会读取mapper.py 的结果作为reducer.py 的输入
for line in sys.stdin:
#去除头尾空格
line = line.strip()
#按照Tab键进行切分
word, count = line.split('\t', 1)
try:
count = int(count)
word2count[word] = word2count.get(word, 0) + count
except ValueError: #count如果不是数字的话,直接忽略掉
pass
#读取数据并筛选含有“Hadoop”的单词,把最终的结果输出到STDOUT。
for word, count in word2count.items():
if word == 'Hadoop':
print '%s\t%s'% (word, count)