小表数据
大表数据
map inner
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
public class mapInner {
public static class myMapper extends Mapper<LongWritable, Text,Text, NullWritable>{
private Map myType=new HashMap();
@Override
protected void setup(Context context) throws IOException, InterruptedException {
String filename =context.getCacheFiles()[0].getPath();
final BufferedReader read=new BufferedReader(new FileReader(filename));
String str=null;
while ((str=read.readLine())!=null){
String [] sps=str.split(",");
myType.put(sps[0],sps[1]);
}
read.close();
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String [] goods=value.toString().split(",");
String type =myType.get(goods[1]).toString();
goods[1]=type;
Text text=new Text(Arrays.toString(goods));
context.write(text,NullWritable.get());
}
}
public static void main(String[] args) throws Exception{
Job job=Job.getInstance(new Configuration());
job.setJarByClass(mapInner.class);
FileInputFormat.addInputPath(job,new Path("e://source2"));
job.setMapperClass(myMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.addFileToClassPath(new Path("e://source1/type.csv"));
FileOutputFormat.setOutputPath(job,new Path("e://ff04"));
job.waitForCompletion(true);
}
}
输出结果
reduce inner
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.njbdqn.utils.Tools;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* 两边都是大文件
*/
public class MyReduceInner {
public static class MyMapper extends Mapper<LongWritable, Text,Text,Text>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//获取当前行数据出自哪个文件 的文件名
String path=((FileSplit)context.getInputSplit()).getPath().toString();
String [] words= value.toString().split(",");
//不同文件输出的键位置不同
if (path.contains("type")){
context.write(new Text(words[0]),new Text("type:"+words[1]));
}else {
context.write(new Text(words[1]),new Text("context:"+words[0]+":"+words[1]+":"+words[2]+":"+words[3]));
}
}
}
public static class MyReduce extends Reducer<Text,Text,Text, NullWritable>{
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
//已经分好组了,处理两组不同的数据
//先将迭代器的数据存放到集合中
// List<Text> lst= Lists.newArrayList(values);
List<Text> lst=new ArrayList<Text>();
for (Text tx:values){
String s=tx.toString();
lst.add(new Text(s));
}
//在数组中找到前面有type:的信息 用来获得商品分类信息
String typeInfo="";
for (Text tx:lst){
String val=tx.toString();
if (val.contains("type")){
typeInfo=val.substring(val.indexOf(":")+1);
//在集合中移除本条信息
lst.remove(tx);
break;
}
}
//将其他的信息遍历替换中间的内容
for (Text tx:lst){
String [] infos=tx.toString().split(":");
infos[2]=typeInfo;
context.write(new Text(Arrays.toString(infos)),NullWritable.get());
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Tools.checkPoint();
Job job=Job.getInstance(new Configuration());
job.setJarByClass(MyReduceInner.class);
FileInputFormat.addInputPath(job,new Path("e://source3"));
job.setMapperClass(org.njbdqn.reducejoin.MyReduceInner.MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(MyReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//job.addCacheFile(new URI("hdfs://192.168.56.100:9000/mydemo/type.csv"));
// job.addFileToClassPath(new Path("e://source1/type.csv"));
FileOutputFormat.setOutputPath(job,new Path("e://pp"));
job.waitForCompletion(true);
}
}
输出结果