#!/usr/bin/python
#coding:utf8
import sys
for line in sys.stdin:
line = line.strip('\n')
arr = line.split('\t')
arr[1] = arr[1].replace("sutao","biansutao").replace("bian","biansutao")
print '\t'.join([arr[0],arr[1]])
'''
add file /home/hadoop/demo.py
select transform(t.id,t.name) using '/usr/bin/python demo.py' as (a int,b string) from test t;
'''
'''
ADD FILE mapper.py;
ADD FILE reducer.py;
FROM (
FROM tweets_parsed
MAP tweets_parsed.time, tweets_parsed.id, tweets_parsed.tweet
USING 'python mapper.py'
AS word, count
CLUSTER BY word) map_output
INSERT OVERWRITE TABLE word_count
REDUCE map_output.word, map_output.count
USING 'python reducer.py'
AS word, count;
'''
分享到:
2012-03-09 18:05
浏览 5077
评论