[root@master mapreduce_wordcount_python]# ls
map.py red.py run.sh The_Man_of_Property.txt
[root@master mapreduce_wordcount_python]# cat run.sh
HADOOP_CMD="/usr/local/src/hadoop-2.6.1/bin/hadoop"
STREAM_JAR_PATH="/usr/local/src/hadoop-2.6.1/share/hadoop/tools/lib/hadoop-streaming-2.6.1.jar"
INPUT_FILE_PATH_1="/The_Man_of_Property.txt"
OUTPUT_PATH="/output"
$HADOOP_CMD fs -rmr -skipTrash $OUTPUT_PATH
# Step 1.
$HADOOP_CMD jar $STREAM_JAR_PATH \
-input $INPUT_FILE_PATH_1 \
-output $OUTPUT_PATH \
-mapper "python map.py" \
-reducer "python red.py" \
-file ./map.py \
-file ./red.py
[root@master mapreduce_wordcount_python]# hadoop fs -put The_Man_of_Property.txt /
[root@master mapreduce_wordcount_python]# hadoop fs -ls /
Found 2 items
-rw-r--r-- 3 root supergroup 632207 2017-08-09 22:13 /The_Man_of_Property.txt
drwx------ - root supergroup 0 2017-08-07 10:20 /tmp
[root@master mapreduce_wordcount_python]# ls
map.py part-00000 red.py run.sh The_Man_of_Property.txt
[root@master mapreduce_wordcount_python]# ./run.sh
rmr: DEPRECATED: Please use 'rm -r' instead.
rmr: `/output': No such file or directory
17/08/09 22:13:19 WARN streaming.StreamJob: -file option is deprecated, please use generic option -files instead.
packageJobJar: [./map.py, ./red.py, /tmp/hadoop-unjar6173691576155728852/] [] /tmp/streamjob2384986044287166108.jar tmpDir=null
17/08/09 22:13:21 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.183.100:8032
17/08/09 22:13:22 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.183.100:8032
17/08/09 22:13:23 INFO mapred.FileInputFormat: Total input paths to process : 1
17/08/09 22:13:23 INFO mapreduce.JobSubmitter: number of splits:2
17/08/09 22:13:23 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1502176590410_0001
17/08/09 22:13:24 INFO impl.YarnClientImpl: Submitted application application_1502176590410_0001
17/08/09 22:13:24 INFO mapreduce.Job: The url to track the job: http://master:8088/proxy/application_1502176590410_0001/
17/08/09 22:13:24 INFO mapreduce.Job: Running job: job_1502176590410_0001
17/08/09 22:13:38 INFO mapreduce.Job: Job job_1502176590410_0001 running in uber mode : false
17/08/09 22:13:38 INFO mapreduce.Job: map 0% reduce 0%
17/08/09 22:14:02 INFO mapreduce.Job: map 100% reduce 0%
17/08/09 22:14:13 INFO mapreduce.Job: map 100% reduce 100%
17/08/09 22:14:14 INFO mapreduce.Job: Job job_1502176590410_0001 completed successfully
17/08/09 22:14:14 INFO mapreduce.Job: Counters: 49
File System Counters
FILE: Number of bytes read=1076444
FILE: Number of bytes written=2480562
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=635780
HDFS: Number of bytes written=181530
HDFS: Number of read operations=9
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=2
Launched reduce tasks=1
Data-local map tasks=2
Total time spent by all maps in occupied slots (ms)=41881
Total time spent by all reduces in occupied slots (ms)=9342
Total time spent by all map tasks (ms)=41881
Total time spent by all reduce tasks (ms)=9342
Total vcore-seconds taken by all map tasks=41881
Total vcore-seconds taken by all reduce tasks=9342
Total megabyte-seconds taken by all map tasks=42886144
Total megabyte-seconds taken by all reduce tasks=9566208
Map-Reduce Framework
Map input records=2866
Map output records=111783
Map output bytes=852872
Map output materialized bytes=1076450
Input split bytes=188
Combine input records=0
Combine output records=0
Reduce input groups=16984
Reduce shuffle bytes=1076450
Reduce input records=111783
Reduce output records=16984
Spilled Records=223566
Shuffled Maps =2
Failed Shuffles=0
Merged Map outputs=2
GC time elapsed (ms)=616
CPU time spent (ms)=5980
Physical memory (bytes) snapshot=497496064
Virtual memory (bytes) snapshot=2538315776
Total committed heap usage (bytes)=256647168
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=635592
File Output Format Counters
Bytes Written=181530
17/08/09 22:14:14 INFO streaming.StreamJob: Output directory: /output
[root@slave1 hadoop-2.6.1]# hadoop fs -ls /
Found 3 items
-rw-r--r-- 3 root supergroup 632207 2017-08-09 22:13 /The_Man_of_Property.txt
drwxr-xr-x - root supergroup 0 2017-08-09 22:14 /output
drwx------ - root supergroup 0 2017-08-07 10:20 /tmp
[root@slave1 hadoop-2.6.1]# hadoop fs -ls /output
l^HFound 2 items
-rw-r--r-- 3 root supergroup 0 2017-08-09 22:14 /output/_SUCCESS
-rw-r--r-- 3 root supergroup 181530 2017-08-09 22:14 /output/part-00000
[root@slave1 hadoop-2.6.1]# hadoop fs -text /output/part-00000 | head
(Baynes 1
(Dartie 1
(Dartie’s 1
(Down-by-the-starn) 2
(Down-by-the-starn), 1
(He 1
(I 1
(James) 1
(L500) 1
(Louisa 1
text: Unable to write to output stream.
[root@slave1 hadoop-2.6.1]#
Hadoop2_mapreduce_wordcount
最新推荐文章于 2021-07-03 11:27:53 发布