环境 :
hadoop集群
1、编写Mapper类
package com.xue.mapreducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @author 薛向毅
* @create 2018-09-01 19:33
**/
public class WordCountMapper extends Mapper<LongWritable,Text,Text,LongWritable> {
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
/*
* key : 输入的key
* value: 数据
* context: map上下文
* */
String data = value.toString();
//分词
String[] words = data.split(" ");
//输出每个单词
for (String word : words) {
context.write(new Text(word),new LongWritable(1));
}
}
}
2、编写Reducer类
package com.xue.mapreducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
import javax.xml.soap.Text;
import java.io.IOException;
/**
* @author 薛向毅
* @create 2018-09-01 19:30
**/
public class WordCountReducer extends Reducer<Text,LongWritable,Text,LongWritable>{
protected void reduce(Text k3, Iterable<LongWritable> v3, Context context) throws IOException, InterruptedException {
//v3 是一个集合。 每个元素就是v2.
long total = 0;
for (LongWritable l : v3) {
total+=l.get();
}
context.write(k3,new LongWritable(total));
}
}
3、编写启动类
package com.xue.mapreducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @author 薛向毅
* @create 2018-09-01 19:44
**/
public class WordCountMain {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//创建一个job = map + reduce
Configuration conf = new Configuration();
//创建一个job
Job job = Job.getInstance(conf);
//指定程序的入口
job.setJarByClass(WordCountMain.class);
//指定job的mapper 和 reducer
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setReducerClass(WordCountReducer.class);
job.setOutputValueClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//指定任务的输入和输出
FileInputFormat.setInputPaths(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
//提交任务
job.waitForCompletion(true);
}
}
4、打jar包运行