1、WordCount代码
package com.test;
import java.io.IOException;
import java.net.URI;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static class WordCountMapper extends Mapper<Object, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString()," \"\"\t\n\r\f,.:;?![]'");
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
}
public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
@SuppressWarnings("unused")
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "WordCount");
job.setJarByClass(WordCount.class);
job.setMapperClass(WordCountMapper.class);
job.setCombinerClass(WordCountReducer.class);
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path("hdfs://192.168.174.129:9000/wcdata"));
Path owcpath = new Path("hdfs://192.168.174.129:9000/owcdata");
FileSystem fs = FileSystem.get(URI.create("hdfs://192.168.174.129:9000"),conf);
if(owcpath == null){
FileOutputFormat.setOutputPath(job, owcpath);
}else{
boolean delResult =fs.delete(owcpath,true);
if(delResult==true){
System.out.println("del success");
FileOutputFormat.setOutputPath(job, owcpath);
}else{
System.out.println("del error");
FileOutputFormat.setOutputPath(job, owcpath);
}
}
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
2、创建保存class文件的目录
在/home/hadoop目录下创建WordCount目录mkdir /home/hadoop/WordCount
在WordCount目录下创建WordCount_class目录mkdir /home/hadoop/WordCount/WordCount_class
3、编译WordCount.jar文件
javac -classpath $HADOOP_HOME/share/hadoop/common/hadoop-common-2.6.0.jar:$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.6.0.jar -d /home/hadoop/WordCount/WordCount_class /home/hadoop/workspace/HelloWorld/src/com/test/WordCount.java
说明:
1)-d 后面的目录是编译后class文件存放的目录
2)/home/hadoop/workspace/HelloWorld/src/com/test/WordCount.java 是WordCount.jar所在工程的目录
4、将上一步中class文件打包成可执行的jar文件,具体命令如下:
jar -cvf ./WordCount.jar -C /home/hadoop/WordCount/WordCount_class .
说明:-C参数是必须要加的,最后面的“.”千万不要丢掉
5、向hadoop集群提交WordCout任务,具体命令如下
bin/hadoop jar WordCount.jar com.test.WordCount
说明: jar 后面的jar文件为编译好的jar文件 jar后面为java源文件的类(形式为包名+类名)
6、执行结果
[hadoop@Master WordCount]$ hadoop jar WordCount.jar com.test.WordCount
16/10/06 23:29:18 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
del success
16/10/06 23:29:21 INFO client.RMProxy: Connecting to ResourceManager at Master/192.168.174.129:8032
16/10/06 23:29:23 WARN mapreduce.JobSubmitter: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
16/10/06 23:29:24 INFO input.FileInputFormat: Total input paths to process : 1
16/10/06 23:29:24 INFO mapreduce.JobSubmitter: number of splits:1
16/10/06 23:29:25 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1475805031685_0002
16/10/06 23:29:26 INFO impl.YarnClientImpl: Submitted application application_1475805031685_0002
16/10/06 23:29:26 INFO mapreduce.Job: The url to track the job: http://Master:8088/proxy/application_1475805031685_0002/
16/10/06 23:29:26 INFO mapreduce.Job: Running job: job_1475805031685_0002
16/10/06 23:29:56 INFO mapreduce.Job: Job job_1475805031685_0002 running in uber mode : false
16/10/06 23:29:56 INFO mapreduce.Job: map 0% reduce 0%
16/10/06 23:30:40 INFO mapreduce.Job: map 100% reduce 0%
16/10/06 23:31:03 INFO mapreduce.Job: map 100% reduce 100%
16/10/06 23:31:04 INFO mapreduce.Job: Job job_1475805031685_0002 completed successfully
16/10/06 23:31:04 INFO mapreduce.Job: Counters: 49
File System Counters
FILE: Number of bytes read=105
FILE: Number of bytes written=210923
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=157
HDFS: Number of bytes written=63
HDFS: Number of read operations=6
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Launched reduce tasks=1
Data-local map tasks=1
Total time spent by all maps in occupied slots (ms)=40978
Total time spent by all reduces in occupied slots (ms)=19816
Total time spent by all map tasks (ms)=40978
Total time spent by all reduce tasks (ms)=19816
Total vcore-seconds taken by all map tasks=40978
Total vcore-seconds taken by all reduce tasks=19816
Total megabyte-seconds taken by all map tasks=41961472
Total megabyte-seconds taken by all reduce tasks=20291584
Map-Reduce Framework
Map input records=2
Map output records=9
Map output bytes=81
Map output materialized bytes=105
Input split bytes=109
Combine input records=9
Combine output records=9
Reduce input groups=9
Reduce shuffle bytes=105
Reduce input records=9
Reduce output records=9
Spilled Records=18
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=857
CPU time spent (ms)=6620
Physical memory (bytes) snapshot=286318592
Virtual memory (bytes) snapshot=4120993792
Total committed heap usage (bytes)=137936896
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=48
File Output Format Counters
Bytes Written=63
[hadoop@Master WordCount]$