简绍
- 这个使用于小表关联大表,将小表在map阶段缓存,从而完成表的join操作
- MapJoin没有Reduce阶段
代码
Map
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
public class map extends Mapper<LongWritable, Text,Text, NullWritable> {
Map<String,String>map = new HashMap<>();
Text k = new Text();
@Override
protected void setup(Context context) throws IOException, InterruptedException {
URI[] cacheFiles = context.getCacheFiles();
String path =cacheFiles[0].getPath().toString();
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
String line;
while ((line=bufferedReader.readLine())!=null){
String [] words = line.split(" ");
map.put(words[0],words[1]);
}
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String [] s = value.toString().split(" ");
String brand = map.get(s[1]);
k.set(s[0]+"\t"+brand+"\t"+s[2]);
context.write(k,NullWritable.get());
}
}
Driver
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.File;
import java.net.URI;
public class dri {
public static void main(String[] args) throws Exception {
File file = new File("D:\\MapJoin\\output");
if (file.exists()){
delFile(file);
driver();
}else {
driver();
}
}
public static void delFile(File file) {
File[] files = file.listFiles();
if (files != null && files.length != 0) {
for (int i = 0;i<files.length;i++) {
delFile(files[i]);
}
}
file.delete();
}
public static void driver() throws Exception{
Job job = Job.getInstance(new Configuration());
job.setMapperClass(map.class);
job.setJarByClass(dri.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.addCacheFile(new URI("file:///D:/MapJoin/input/pd.txt"));
FileInputFormat.setInputPaths(job, "D:\\MapJoin\\input\\order.txt");
FileOutputFormat.setOutputPath(job, new Path("D:\\MapJoin\\output"));
boolean b = job.waitForCompletion(true);
System.exit(b ? 0 : 1);
}
}