- 设置参数
- #!/bin/bash
- hadoop fs -rmr trans_record/result
- hadoop jar ./hadoop-streaming-2.0.0-mr1-cdh4.7.0.jar \
- -input $1 \
- -output trans_record/result \
- -file map.py \
- -file reduce.py \
- -mapper "python map.py" \
- -reducer "python reduce.py" \
- -jobconf mapred.reduce.tasks=1 \
- -jobconf mapred.job.name="qianjc_trans_record" \
- -cmdenv "card_start=$2" \
- -cmdenv "card_last=$3" \
- -cmdenv "trans_at=$4"
- #!/usr/bin/env python
- # vim: set fileencoding=utf-8
- import sys
- import os
- def main():
- card_start = os.environ.get('card_start')
- card_last = os.environ.get('card_last')
- trans_at = float(os.environ.get('trans_at'))
- for line in sys.stdin:
- detail = line.strip().split(',')
- card = detail[0]
- money = float(detail[17])
- if trans_at == money and card_start == card[1 : 7] and card_last == card[-4 : ]:
- print '%s\t%s' % (line.strip(), detail[1])
- if __name__ == '__main__':
- main()