a.txt
id name
001 iphone6
002 xiaominote
003 mate7
004 nubia
005 meilan
b.txt
id orderid
001 00101
001 00110
002 01001
...
001 iphone6-->a 00101-->b 00110-->b
select a.name,b.orderid from a,b where a.id=b.id
iphone6 00101
iphone6 00110
package com.test.hadoop.mr.JoinQuery;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class JoinQuery {
public static class JoinQueryMapper extends Mapper<LongWritable, Text, Text, Text> {
private Text k = new Text();
private Text v = new Text();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String record = value.toString();
String[] fields = StringUtils.split(record, "\t");
String id = fields[0];
String name = fields[1];
FileSplit inputSplit = (FileSplit) context.getInputSplit();
String fileName = inputSplit.getPath().getName();
k.set(id);
v.set(name + "-->" + fileName);
// k:001 v:ipone6->a.txt
context.write(k, v);
}
}
public static class JoinQueryReducer extends Reducer<Text, Text, Text, Text> {
// k:001 values: [iphone6-->a.txt, 00101-->b.txt,00110-->b.txt]
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
// 第一次循环拿出a表中的那个字段
String lefKey = "";
ArrayList<String> rightFields = new ArrayList<>();
for (Text value : values) {
if (value.toString().contains("a.txt")) {
lefKey = StringUtils.split(value.toString(), "-->")[0];
} else {
rightFields.add(value.toString());
}
}
// 再用leftkey去遍历拼接b表中的字段,并输出结果
for (String field : rightFields) {
String result = "";
result += lefKey + "\t" + StringUtils.split(field.toString(), "-->")[0];
context.write(new Text(lefKey), new Text(result));
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job joinJob = Job.getInstance(conf);
joinJob.setJarByClass(JoinQuery.class);
joinJob.setMapperClass(JoinQueryMapper.class);
joinJob.setReducerClass(JoinQueryReducer.class);
joinJob.setOutputKeyClass(Text.class);
joinJob.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(joinJob, new Path("D:/BaiduYunDownload/hadoopjoin/srcData"));
FileOutputFormat.setOutputPath(joinJob, new Path("D:/BaiduYunDownload/hadoopjoin/out"));
joinJob.waitForCompletion(true);
}
}