#!/bin/bash
HADOOP_HOME=/usr/local/webserver/hadoop
INPUT_PATH=/data/archive/app_oeudjgn5872a7c3aaa54_datamine/george/inputs
INPUT_PATH2=/user/resys/projects/image/koudai_img/d_img/reduced/2015-taobao
OUTPUT_PATH=/data/archive/app_oeudjgn5872a7c3aaa54_datamine/george/output/may30
#OUTPUT_PATH=output
#echo "Clearing output path: $OUTPUT_PATH"
$HADOOP_HOME/bin/hadoop fs -rmr $OUTPUT_PATH
${HADOOP_HOME}/bin/hadoop jar\
${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-streaming-2.6.0.jar\
-file mapper1.py\
-file reducer1.py\
-mapper mapper1.py\
-reducer reducer1.py\
-input $INPUT_PATH\
-input $INPUT_PATH2\
-output $OUTPUT_PATH\
-jobconf mapreduce.jobtracker.split.metainfo.maxsize=-1\
-jobconf mapred.job.queue.name="offline"\
-jobconf mapred.job.priority="NORMAL"\
-jobconf mapred.reduce.tasks="10"\
word_count
最新推荐文章于 2021-03-26 23:02:50 发布