--==============mapreduce编写格式=============================
=》概述:map和reduce遵循的格式
map:(K1,V1)->list(K2,V2)
reduce:(K2,list(V2))->list(K3,V3)
Context是上下文
=》Mapper基类
protect void map(KEY key,VALUE value,Context context)
throws IOException,InterruptedException{
}
=》Reducer的基类
protect void reduce(KEY key,Iterable<VALUE> value,Context context)
throws IOException,InterruptedException{
}
=》框架的代码
//map 和reduce 区
Class MR{
static public Class Mapper...{
//map代码区
}
static public Class Reducer...{
//reduce代码区
}
}
//Driver 区(组装)
main(){
Configuration conf=new Configuration();
Job job=new Job(conf,"job name");
job.setJarByClass(thisMainClass.class);
job.setMapperClass(Mapper.class);
job.setReduceClass(Reducer.class);
FileInputFormat.addInputPaths(job,new Path(arg[0]));
FileOutputFormat.setOutputPaths(job,new Path(arg[1]));
//其它配置参数代码
job.waitForCompletions(true);
bin/yarn jar ./jars/mr-wordcount.jar /user/npl/wordcount/input /user/npl/wordcount/output
=》概述:map和reduce遵循的格式
map:(K1,V1)->list(K2,V2)
reduce:(K2,list(V2))->list(K3,V3)
Context是上下文
=》Mapper基类
protect void map(KEY key,VALUE value,Context context)
throws IOException,InterruptedException{
}
=》Reducer的基类
protect void reduce(KEY key,Iterable<VALUE> value,Context context)
throws IOException,InterruptedException{
}
=》框架的代码
//map 和reduce 区
Class MR{
static public Class Mapper...{
//map代码区
}
static public Class Reducer...{
//reduce代码区
}
}
//Driver 区(组装)
main(){
Configuration conf=new Configuration();
Job job=new Job(conf,"job name");
job.setJarByClass(thisMainClass.class);
job.setMapperClass(Mapper.class);
job.setReduceClass(Reducer.class);
FileInputFormat.addInputPaths(job,new Path(arg[0]));
FileOutputFormat.setOutputPaths(job,new Path(arg[1]));
//其它配置参数代码
job.waitForCompletions(true);
}
--==============mapreduce示例=============================
package com.npl.hadoop.senier.hdfs;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.yarn.webapp.hamlet.HamletSpec.B;
public class WordCount {
// step 1: Map Class
public static class WordCountMapper extends
Mapper<LongWritable, Text, Text, IntWritable> {
private Text mapOutputKey=new Text();
private final static IntWritable mapOutputValue=new IntWritable(1);
// get file ->split -><value,1>
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
//get line value
String lineValue= value.toString();
//get each word and put into <key,value>
StringTokenizer stringTokenizer =new StringTokenizer(lineValue);
while(stringTokenizer.hasMoreTokens()){
String wordValue=stringTokenizer.nextToken();
mapOutputKey.set(wordValue);
//output
context.write(mapOutputKey, mapOutputValue);
}
}
}
// step 2: Reduce Class
//group : put same key into a List
//<hadoop,1> <hadoop,1> -> <hadoop,list(1,1)>
public static class WordCountReduce extends
Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable reduceOutputValue=new IntWritable(1);
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value:values) {
sum += value.get();
}
reduceOutputValue.set(sum);
context.write(key, reduceOutputValue);
}
}
// step 2: Map Class
public int run(String[] args) throws Exception {
//conf & jar
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, this.getClass().getSimpleName());
job.setJarByClass(this.getClass());
//input
Path inpath = new Path(args[0]);
FileInputFormat.addInputPath(job, inpath);
//mapper class
job.setMapperClass(WordCountMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// shuffle:partitions,sort,combiner,group
// job.setPartitionerClass(a.class);
// job.setSortComparatorClass(B.class);
// job.setCombinerClass(c.class);
// job.setGroupingComparatorClass(d.class);
//reduce class
job.setReducerClass(WordCountReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//output
Path outpath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outpath);
//submit job
Boolean isSuccess = job.waitForCompletion(true);
return isSuccess ? 0 : 1;
}
//step 3 : run Program
public static void main(String[] args) throws Exception {
int status=new WordCount().run(args);
System.exit(status);
}
}
--==========打成jar包在yarn上运行=====================
工程->【export】-> 【java】->【jar file】bin/yarn jar ./jars/mr-wordcount.jar /user/npl/wordcount/input /user/npl/wordcount/output