题目:
使用Conbiner进行词频统计
思路:
Combiner在整个Map阶段结束后,进行一次合并,可以理解为提前的一次reduce
代码:
//MyCombiner
package A_Combiner02;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class MyCombiner extends Reducer<Text, LongWritable, Text, LongWritable> {
public LongWritable v = new LongWritable();
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
int cnt = 0;
for(LongWritable lw: values){
cnt += lw.get();
}
v.set(cnt);
context.write(key, v);
}
}
//MapReduce
package A_Combiner02;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MainDemo {
public static void main(String[] args){
try{
Configuration conf = new Configuration();
conf.set("fs.defaultFS","hdfs://hadoop105:9000");
Job job = Job.getInstance(conf, "A_Combiner02-MainDemo");
job.setJarByClass(MainDemo.class);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//job设置combiner类
job.setCombinerClass(MyCombiner.class);
FileInputFormat.addInputPath(job, new Path("/test/input/t3.txt"));
FileOutputFormat.setOutputPath(job, new Path("/test/output/08"));
int success = job.waitForCompletion(true) ? 0: 1;
System.exit(success);
}
catch (Exception e){
e.printStackTrace();
}
}
//自定义的Mapper类
public static class MyMapper extends Mapper<Object, Text, Text, LongWritable>{
Text k = new Text();
LongWritable v = new LongWritable(1L);
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String row = value.toString();
String[] words = row.split(" ");
for(String st: words){
k.set(st);
context.write(k, v);
}
}
}
//自定义的Reducer类
public static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
for(LongWritable lw: values){
context.write(key, lw);
}
}
}
}