通常情况下,文件的每一行是一个键--值对,使用某个分解符进行分割,比如制表符,例如,由TextOutFormat(即Hadoop的默认输出就是这种),如果要正确处理这种文件,KeyValueTextInputFormat比较合适。
1、MaxTempMapper
package hadoop.mr.input.keyvaluetext;
import debugtool.RTUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* MaxTempMapper
*/
public class MaxTempMapper extends Mapper<Text, Text, IntWritable, IntWritable> {
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
context.write(new IntWritable(Integer.parseInt(key.toString())), new IntWritable(Integer.parseInt(value.toString())));
}
}
2、MaxTempReducer
package .hadoop.mr.input.keyvaluetext;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
*/
public class MaxTempReducer extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable>{
protected void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int max = Integer.MIN_VALUE ;
for(IntWritable iw : values){
max = max > iw.get() ? max : iw.get() ;
}
context.write(key,new IntWritable(max));
}
}
3、App
package hadoop.mr.input.keyvaluetext;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
*/
public class App {
public static void main(String[] args) throws Exception {
args = new String[]{"d:/java/mr/temp.dat", "d:/java/mr/out"};
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
if(fs.exists(new Path(args[1]))){
fs.delete(new Path(args[1]),true);
}
Job job = Job.getInstance(conf);
job.setJobName("maxTemp");
job.setJarByClass(App.class);
job.setMapperClass(MaxTempMapper.class);
job.setReducerClass(MaxTempReducer.class);
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
//设置combine输入格式
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.getConfiguration().set(KeyValueLineRecordReader.KEY_VALUE_SEPERATOR," ");
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
job.setNumReduceTasks(2);
job.waitForCompletion(true);
}
}
1、KeyValueTextInputFormat继承
FileInputFormat
2、
job.
waitForCompletion提交作业
3、调用
JobSubmitter类
4、
调用FileInputFormat的
getSplits方法
5、KeyValueTextInputFormat的isSplitable
6、调用KeyValueTextInputFormat 的createRecordReader 方法创建KeyValueLineRecordReader
7、Mapper的run方法
8、循环调用KeyValueLineRecordReader的nextKeyValue方法
9、返回key和value