a表数据:
id year bb
1 2010 1999
1 2011 1998
2 2010 1997
2 2011 1996
4 2010 1995
4 2011 1994
9 2010 1993
9 2011 1992
b表数据:
id address
1 哈哈1
2 哈哈2
3 哈哈3
4 哈哈4
5 哈哈5
6 哈哈6
7 哈哈7
8 哈哈8
9 哈哈9
根据b表id对应的address 在a表后面添加一行字段address 与id匹配
输出结果:
1 1 2011 1999 哈哈1
1 1 2010 1998 哈哈1
2 2 2011 1997 哈哈2
2 2 2010 1996 哈哈2
4 4 2011 1995 哈哈4
4 4 2010 1994 哈哈4
9 9 2011 1993 哈哈9
9 9 2010 1992 哈哈9
代码:
package mapreduce.joinmapreducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.Vector;
public class Join{
//Mapper方法
static class Map extends Mapper<LongWritable, Text,LongWritable,Text>{
String splitStr = " ";
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//获取输入文件的文件名
String inputname = ((FileSplit)context.getInputSplit()).getPath().getName();
String line = value.toString();
//抛弃所有空数据
if(line == null || line.equals(""))
return;
//按逗分割数据,取出id
String[] linesplite = line.split(splitStr);
int id = Integer.parseInt(linesplite[0]);
//处理来自各表的数据
switch (inputname){
case "a.txt":
context.write(new LongWritable(id),new Text("a# "+line));break;
case "b.txt":
context.write(new LongWritable(id),new Text("b# "+line));break;
default: context.write(new LongWritable(id),new Text("c# "+line));break;
}
}
}
static class Red extends Reducer<LongWritable,Text,LongWritable,Text> {
@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
Vector<String> a = new Vector<String>();
Vector<String> b = new Vector<String>();
for(Text t : values){
String[] line = t.toString().split(" ");
if(line[0].equals("a#")){
a.add(t.toString());
}
else if(line[0].equals("b#")){
b.add(t.toString());
}
}
for(int i=0;i<a.size();i++){
for(int j=0;j<b.size();j++){
context.write(key,new Text(a.get(i).substring(2)+" "+b.get(j).substring(4)));
}
}
}
}
//main方法
static class Mappereduce {
public static void main(String[] args)throws Exception{
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
job.setJarByClass(Mappereduce.class);
//输出文件路径
Path outputpath = new Path("/data/testdata/output");
//判断输出文件是否存在,存在则删除。
FileSystem.get(configuration).delete(outputpath,true);
//设置mapper\reducer类
job.setMapperClass(Map.class);
job.setReducerClass(Red.class);
//设置mapper\reducer 输出key,value的类型
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
//设置输入和输入路径
FileInputFormat.addInputPath(job,new Path("/data/testdata/"));
FileOutputFormat.setOutputPath(job,outputpath);
job.waitForCompletion(true);
}
}
}