#!/bin/bash
file_name=$1
#删除结果目录
hadoop fs -rm -r /bigdata/output/test/
#删除已经存在的文件
hadoop fs -rm /bigdata/input/test/$file_name
#上传本地文件到hdfs
hadoop fs -put $file_name /bigdata/input/test/
#统计行数
hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-streaming-1.0.1.jar -files count.sh -input /bigdata/input/test/$file_name -output /bigdata/output/test/ -mapper 'wc -l' -reducer "sh count.sh"
count.sh
#!/bin/bash
count=0
while read LINE;do
count=$(($count+$LINE))
done
echo $count
运行成功后在hdfs目录下会产生结果文件
hadoop fs -cat /output/test/part-00000