文章目录
由于工作中频繁使用MapReduce ,所以为了提升日后的工作效率,搭建作者最常用的MR Streaming 模板。
MapReduce工作笔记 系列目录:MapReduce工作笔记——目录
模板概览
#!/bin/sh
# #-----------------------------------#
# _ _ _
# | | | | __ _ __| | ___ ___ _ __
# | |_| |/ _` |/ _` |/ _ \ / _ \| '_ \
# | _ | (_| | (_| | (_) | (_) | |_) |
# |_| |_|\__,_|\__,_|\___/ \___/| .__/
# |_|
# #-----------------------------------#
# Created on 2018.11.13
# Latest modified on 2018.11.13
# @author: wangcongying
# #-----------------------------------#
# MAPPER: ${CURDIR}/mapper.py
# REDUCER: ${CURDIR}/reducer.py
# #-----------------------------------#
if [ $# != 1 ] ; then
echo "***********************"
echo "Parameter error !!!"
echo "USAGE: ./TestHadoopJob.sh "
echo " e.g.:"
echo "***********************"
exit 1;
fi
# ${HADOOP_HOME}: HADOOP 路径
HADOOP_HOME=""
HDP="$HADOOP_HOME/bin/hadoop fs"
MY_PATH=$(dirname $0)
CUR_DIR=`dirname $(readlink -f $0)`
Today=`date +%Y%m%d`
# ${INPUT}: HDFS 输入路径
# ${OUTDIR}: HDFS 输出路径
# ${JOB_NAME}: MR JOB 命名
INPUT=""
OUTDIR=""
JOB_NAME="wangcongying_${Today}"
echo '===============================================' >> $MY_PATH/hadoop_screen.ans
$HDP -rmr $OUTDIR
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-streaming.jar \
-D mapred.job.max.map.running=500 \
-D mapred.job.max.reduce.running=500 \
-D mapred.ignore.badcompress="true" \
-D mapred.use.multimembergzip="true" \
-D mapred.max.map.failures.percent=3 \
-D stream.num.map.output.key.fields=1 \
-D mapred.text.key.comparator.options="-k1,1" \
-D mapred.job.name=$JOB_NAME \
-D mapred.job.priority="VERY_HIGH" \
-jobconf mapred.reduce.tasks=100 \
-jobconf mapred.child.env="LANG=en_US.UTF-8,LC_ALL=en_US.UTF-8" \
-mapper "mapper.py" \
-reducer "reducer.py" \
-input ${INPUT} \
-output ${OUTDIR} \
-file "$MY_PATH/reducer.py" \
-file "$MY_PATH/mapper.py" \
2>&1 | tee -a $MY_PATH/hadoop_screen.ans
$HDP -test -e ${OUTDIR}/_SUCCESS
if [ $? -ne 0 ]; then
echo "ERROR: Hadoop job Hubble Launcher Merge failed, quit!"
exit 1;
fi
exit 0;