1、order.txt
1 1 3 4
2 21 4 4
3 12 2 9
4 1 3 4
5 2 1 2
2、product.txt
1 1 小米
2 2 华为
3 3 一加
4 4 荣耀
3、order实体类
import lombok.Data;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
@Data
public class Order implements Writable {
/**
* 订单id
*/
private Integer id;
/**
* 商品id
*/
private Integer productId;
/**
* 商品名称
*/
private String name;
/**
* 数量
*/
private Integer num;
/**
* 类型
*/
private Integer flag;
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(id);
dataOutput.writeInt(productId);
dataOutput.writeUTF(name);
dataOutput.writeInt(num);
dataOutput.writeInt(flag);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.id=dataInput.readInt();
this.productId= dataInput.readInt();
this.name=dataInput.readUTF();
this.num= dataInput.readInt();
this.flag= dataInput.readInt();
}
@Override
public String toString() {
return
"订单Id=" + id +
",商品ID=" + productId +
",名称=" + name +
",数量=" + num ;
}
}
4、mapper
这里属性就算是空的,也要给赋个值,不然到Order表序列化的时候,write拿不到值会报空指针
package com.example.hadoop.reduceJoin;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import java.io.IOException;
public class ReducerJoinMapper extends Mapper<LongWritable, Text, Text, Order> {
private String filename;
private Text outK = new Text();
private Order outV = new Order();
@Override
protected void setup(Context context) throws IOException, InterruptedException {
//初始化oder pd
FileSplit split = (FileSplit) context.getInputSplit();
filename = split.getPath().getName();
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//获取一行
String line = value.toString();
String[] split = line.split(" ");
//判断是哪个文件的
if (filename.contains("order")) {//处理的是订单表
//封装
outK.set(split[2]);
outV.setId(Integer.valueOf(split[1]));
outV.setProductId(Integer.valueOf(split[2]));
outV.setNum(Integer.valueOf(split[3]));
outV.setName("");
outV.setFlag(0);
context.write(outK, outV);
return;
}//处理的是商品表
outK.set(split[1]);
outV.setId(0);
outV.setNum(0);
outV.setProductId(Integer.valueOf(split[1]));
outV.setName(split[2]);
outV.setFlag(1);
//写出
context.write(outK, outV);
}
}
5、reducer
Iterable迭代器只能循环一次,value的引用地址也会跟着改变,要属性拷贝
package com.example.hadoop.reduceJoin;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.springframework.beans.BeanUtils;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
public class ReducerJoinReducer extends Reducer<Text, Order, Order, NullWritable> {
@Override
protected void reduce(Text key, Iterable<Order> values, Context context) throws IOException, InterruptedException {
Order product = new Order();
List<Order> list = new LinkedList<>();
//Iterable只能循环一次
for (Order value : values) {
if (value.getFlag() == 1) {
//要属性拷贝,不然引用地址会改变会导致product值引用错误
BeanUtils.copyProperties(value, product);
continue;
}
Order order = new Order();
BeanUtils.copyProperties(value, order);
list.add(order);
}
for (Order value : list) {
value.setName(product.getName());
context.write(value, NullWritable.get());
}
}
}
6、main方法
package com.example.hadoop.reduceJoin;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class ReducerJoinDriver {
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
Job job=Job.getInstance(conf);
job.setJarByClass(ReducerJoinDriver.class);
job.setMapperClass(ReducerJoinMapper.class);
job.setReducerClass(ReducerJoinReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Order.class);
job.setOutputKeyClass(Order.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job,new Path("D:\\join"));
FileOutputFormat.setOutputPath(job,new Path("D:\\hadoop\\output11"));
//7、提交job
boolean result=job.waitForCompletion(true);
System.exit(result?0:1);
}
}
成功