如何理解MapperLongWritable,Text,Text,IntWritable和ReducerText,IntWritable,Text,IntWritable
1.Mapper
importjava.io.IOException;
importorg.apache.hadoop.io.IntWritable;
importorg.apache.hadoop.io.LongWritable;
importorg.apache.hadoop.io.Text;
importorg.apache.hadoop.mapreduce.Mapper;
/**
*LongWritable偏移量long,表示该行在文件中的位置,而不是行号
*Textmap阶段的输入数据一行文本信息字符串类型String
*Textmap阶段的数据字符串类型String
*IntWritablemap阶段输出的value类型,对应java中的int型,表示行号
*/
publicclassWorkCountMapextendsMapper<LongWritable,Text,Text,IntWritable>{
/**
*key输入的键
*value输入的值
*context上下文对象
*/
@Override
protectedvoidmap(LongWritablekey,Textvalue,Contextcontext)
throwsIOException,InterruptedException{
Stringline=value.toString();
String[]words=line.split("/t");//分词
for(Stringword:words{
TextwordText=newText(word);
IntWritableoutValue=newIntWritable();
//写出
context.write(wordText,outValue);
}
}
}
2.Reduce
reduce阶段的输入 是mapper阶段的输出
importjava.io.IOException;
importorg.apache.hadoop.io.IntWritable;
importorg.apache.hadoop.io.Text;
importorg.apache.hadoop.mapreduce.Reducer;
/**
*Text 数据类型:字符串类型String
*IntWritable reduce阶段的输入类型int
*Textreduce阶段的输出数据类型String类型
*IntWritable输出词频个数Int型
*/
publicclassWorkCountReduceextendsReducer<Text,IntWritable,Text,IntWritable>{
/**
*key输入的键
*value输入的值
*context上下文对象,用于输出键值对
*/
@Override
protectedvoidreduce(Textkey,Iterable<IntWritable>value,
Contextcontext)throwsIOException,InterruptedException{
intsum=0;
for(IntWritablenumber:value{
sum+=number.get();
}
//单词 个数 hadoop 10
context.write(key,newIntWritable(sum));
}
}