1.数据:每条数据两个值表示线段的起点和重点位置
目标:统计每个点重叠(至少等于2)的线段个数
思路:
Map任务:映射每条数据经过的所有点位置为key,<key,1>
Reduce任务:累计额相同key的value,大于1的则输出
2.代码
public class Line {
public static class LineMapper extends Mapper<LongWritable, Text, IntWritable, IntWritable>{
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context)
throws IOException, InterruptedException {
//得到线段左右断点位置(split[0],split[1])
String[] split = value.toString().split(",");
for(int i=Integer.parseInt(split[0]);i<=Integer.parseInt(split[1]);i++){
context.write(new IntWritable(i), new IntWritable(1));
}
}
}
public static class LineReducer extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable>{
@Override
protected void reduce(IntWritable key, Iterable<IntWritable> values,
Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context context)
throws IOException, InterruptedException {
int count=0;
for (IntWritable v : values) {
count+=v.get();
}
//有重叠(count>=2)才输出
if(count>1){
context.write(key, new IntWritable(count));
}
}
}
public static void main(String[] args) throws Exception {
System.setProperty("HADOOP_USER_NAME", "root");
Configuration conf = new Configuration();
conf.set("mapreduce.framework.name", "yarn");
conf.set("fs.defaultFS", "hdfs://hdp-nn-01:9000/");
Job job = Job.getInstance();
job.setJarByClass(Line.class);
job.setMapperClass(LineMapper.class);
job.setReducerClass(LineReducer.class);
//map输出的keyvalue类型必须设置,否则mr框架会默认认为map产生key时LongWritable类型
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path("/mrdata/line/input"));
FileOutputFormat.setOutputPath(job, new Path("/mrdata/line/output"));
boolean res = job.waitForCompletion(true);
System.exit(res?0:1);
}
}
3.结果