编写MR时,有很多代码都是重复的,其实真正的需要编写的业务代码是在 map 和 reduce 中。
以下就是MR的模板:
package com.wangxj.hadoop.mr;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class ModuleMapReduce extends Configured implements Tool{
//map class
/**
*
* @author Administrator
*public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
*/
//todo
public static class ModuleMapper extends //
Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws //
IOException, InterruptedException {
//todo
}
}
//2 reduce class
/**
* public class Reducer<KEYIN,VALUEIN,KEYOUT,VALUEOUT>
* @author Administrator
*
*/
//todo
public static class ModuleReducer extends //
Reducer<Text, IntWritable, Text, IntWritable> {//
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)//
throws IOException, InterruptedException {
//todo
}
}
//3 driver ,component job
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException{
// 1.get configuration
Configuration configuration = getConf();
//create job
Job job = Job.getInstance(configuration,//
this.getClass().getSimpleName());
//run jar
job.setJarByClass(this.getClass());
//set job
//input -> map ->reduce -> output
//input
Path inPath = new Path(args[0]);
FileInputFormat.addInputPath(job,inPath);
//map
job.setMapperClass(ModuleMapper.class);
//todo
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//reduce
job.setReducerClass(ModuleReducer.class);
//todo
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//output
final FileSystem fileSystem = FileSystem.get(new URI(args[0]),configuration);
if (fileSystem.exists(new Path(args[1]))) {
fileSystem.delete(new Path(args[1]),true);
}
Path outPath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outPath);
//submit job
boolean isSucess = job.waitForCompletion(true);
return isSucess ? 0:1;
}
//run program
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
int status = ToolRunner.run(configuration, new ModuleMapReduce(), args);
System.exit(status);
}
}
根据实际情况,编写上面代码中 注释的 : //todo 的部分就可以了。