import re
import sys
for line in sys.stdin:
#匹配文件中的电影名称,《》中内容
allchars=re.findall(r'《(.+?)》',line)
for word in allchars:
print '%s\t%s' % (word, 1)
hadoop jar /data/bigdata/hadoop-2.7.3/share/hadoop/tools/lib/hadoop-streaming-2.7.3.jar -D stream.non.zero.exit.is.failure=false -input /input/movies_history.log -output /output2 -mapper ‘python mapper.py’ -reducer ‘python reduce.py’ -file ./mapper.py -file ./reduce.py
港囧