一、问题描述
任务要求–现有一批电话通信清单,记录了用户A拨打用户B的记录–需要做一个倒排索引,记录拨打给用户B的所有用户A
reverse.txt
13599999999 10086
13899999999 120
13544444444 18955555555
18922222222 18955555555
18900000000 120
13544444444 120
13544444444 110
13544444444 119
13899999999 18955555555
要求使用MapReduce
主叫以‘
|
’分割
二、算法思路
三、MapReduce程序
package com.cl.hadoop.sort;
import com.cl.hadoop.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class ReverseSort {
public static class Map extends Mapper<LongWritable, Text, Text, Text> {
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] lineSplit = value.toString().split(" ");
String anum = lineSplit[0];
String bnum = lineSplit[1];
context.write(new Text(anum), new Text(bnum));
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
StringBuffer out = new StringBuffer();
for (Text valus : values) {
out.append(valus).append("|");
}
context.write(key, new Text(out.substring(0, out.length() - 1).toString()));
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
FileUtil.deleteDir("output");
String[] otherargs = new String[]{"input/sort", "output"};
if (otherargs.length != 2) {
System.err.println("Usage: mergesort <in> <out>");
System.exit(2);
}
Job job = Job.getInstance();
job.setJarByClass(ReverseSort.class);
job.setMapperClass(ReverseSort.Map.class);
job.setReducerClass(ReverseSort.Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherargs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherargs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
运行之后,输出结果如下。
13544444444 119|110|120|18955555555
13599999999 10086
13899999999 18955555555|120
18900000000 120
18922222222 18955555555