hadoop streaming -D stream.non.zero.exit.is.failure=false ...
#!/bin/sh
isdebug=false
STREAMING=/home/work/software/hadoop/contrib/streaming/hadoop-streaming.jar
HADOOP=/home/work/software/hadoop/bin/hadoop
input_path=$1
output_path=/home/user1/tmp
echo "$HADOOP fs -rmr $output_path"
$HADOOP fs -rmr $output_path
echo "$HADOOP jar $STREAMING -D mapred.reduce.tasks=1 -D mapred.job.priority=VERY_HIGH -D mapred.job.name='sunlin-s:grep' -input $input_path -output $output_path -mapper 'egrep \"$2\"' "
$HADOOP jar $STREAMING -D mapred.reduce.tasks=1 -D mapred.job.priority=VERY_HIGH -D mapred.job.name='grep' -D stream.non.zero.exit.is.failure=false -D mapred.max.split.size=800000000 -input $input_path -output $output_path -mapper "egrep '$2'"
exit 0
usage:
sh grep.sh to_grep_path grep_str