CustomerOrder
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class CustomerOrder implements WritableComparable<CustomerOrder> {
private Integer customerId;
private String customerName;
private Integer orderId;
private String orderStatus;
private String flag; //标记 用订单日期作为区别是customer.csv 还是order.csv
public CustomerOrder() {
}
public Integer getCustomerId() {
return customerId;
}
public void setCustomerId(Integer customerId) {
this.customerId = customerId;
}
public String getCustomerName() {
return customerName;
}
public void setCustomerName(String customerName) {
this.customerName = customerName;
}
public Integer getOrderId() {
return orderId;
}
public void setOrderId(Integer orderId) {
this.orderId = orderId;
}
public String getOrderStatus() {
return orderStatus;
}
public void setOrderStatus(String orderStatus) {
this.orderStatus = orderStatus;
}
public String getFlag() {
return flag;
}
public void setFlag(String flag) {
this.flag = flag;
}
@Override
public String toString() {
return "CustomerOrder{" +
"customerId=" + customerId +
", customerName='" + customerName + '\'' +
", orderId=" + orderId +
", orderStatus='" + orderStatus + '\'' +
", flag='" + flag + '\'' +
'}';
}
public CustomerOrder(Integer customerId, String customerName, Integer orderId, String orderStatus, String flag) {
this.customerId = customerId;
this.customerName = customerName;
this.orderId = orderId;
this.orderStatus = orderStatus;
this.flag = flag;
}
@Override
public int compareTo(CustomerOrder o) {
return 0;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(customerId);
dataOutput.writeUTF(customerName);
dataOutput.writeInt(orderId);
dataOutput.writeUTF(orderStatus);
dataOutput.writeUTF(flag);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.customerId=dataInput.readInt();
this.customerName=dataInput.readUTF();
this.orderId=dataInput.readInt();
this.orderStatus=dataInput.readUTF();
this.flag=dataInput.readUTF();
}
}
JoinMapper
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import java.io.IOException;
public class JoinMapper extends Mapper<LongWritable, Text,Text,CustomerOrder> {
//后面会用 全局变量
String name="";
CustomerOrder customerOrder=new CustomerOrder();
@Override
protected void setup(Context context) throws IOException, InterruptedException {
//初始化 两个文件 只调用一次 获取一次名称
FileSplit inputSplit=(FileSplit)context.getInputSplit();
System.out.println(inputSplit.getPath().toString());
name=inputSplit.getPath().getName();
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//获取一行
String[] fields = value.toString().split(",");
System.out.println("JoinMapper:"+value.toString());
//2.判断是哪个文件的
if (name.startsWith("order")) { //订单表 订单表里不包含用户信息
//1,2013/7/25 0:00,11599,CLOSED
//封装k/v
customerOrder.setCustomerId(Integer.parseInt(fields[2]));
customerOrder.setCustomerName("");
customerOrder.setOrderId(Integer.parseInt(fields[0]));
customerOrder.setOrderStatus(fields[3]);
customerOrder.setFlag(fields[1]);
}else{ //顾客表 顾客表里不包含订单信息
//1,Richard,Hernandez,XXXXXXXXX,XXXXXXXXX,6303 Heather Plaza,Brownsville,TX,78521
customerOrder.setCustomerId(Integer.parseInt(fields[0]));
customerOrder.setCustomerName(fields[1]);
customerOrder.setOrderId(-1);
customerOrder.setOrderStatus("");
customerOrder.setFlag("0");
}
Text text=new Text(customerOrder.getCustomerId().toString());
context.write(text,customerOrder);
}
}
JoinPartitioner
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class JoinPartitioner extends Partitioner<Text,CustomerOrder> {
@Override
public int getPartition(Text text, CustomerOrder customerOrder, int i) {
Integer customerId=Integer.parseInt(text.toString());//转换数据
if (customerId%2==0)//奇数偶数
return 0;
else
return 1;
}
}
JoinReducer
import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.List;
//不要的就写NullWritable
public class JoinReducer extends Reducer<Text,CustomerOrder,CustomerOrder, NullWritable> {
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
}
@Override
protected void reduce(Text key, Iterable<CustomerOrder> values, Context context) throws IOException, InterruptedException {
//准备初始化集合
CustomerOrder customer=new CustomerOrder();
List<CustomerOrder>orders=new ArrayList<>();
//循环遍历
for (CustomerOrder customerOrder:
values) {
if (customerOrder.getFlag().equals("0")){ //顾客 只有一个
try {
//工具类 只有一行 不需要add
BeanUtils.copyProperties(customer,customerOrder);
} catch (IllegalAccessException e) {
e.printStackTrace();
} catch (InvocationTargetException e) {
e.printStackTrace();
}
}else{//订单 其他都是订单
CustomerOrder order=new CustomerOrder();
try {
BeanUtils.copyProperties(order,customerOrder);
orders.add(order);//ArrayList
} catch (IllegalAccessException e) {
e.printStackTrace();
} catch (InvocationTargetException e) {
e.printStackTrace();
}
}
}
//循环遍历orders 赋值CustomerName
for(CustomerOrder order:orders){
//从customer中取
order.setCustomerName(customer.getCustomerName());
context.write(order,NullWritable.get());
}
}
}
JoinDriver
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class JoinDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration configuration=new Configuration();
Job job=Job.getInstance(configuration);
job.setJarByClass(JoinDriver.class);
job.setMapperClass(JoinMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(CustomerOrder.class);
job.setReducerClass(JoinReducer.class);
job.setOutputKeyClass(CustomerOrder.class);
job.setOutputValueClass(NullWritable.class);
job.setPartitionerClass(JoinPartitioner.class);
job.setNumReduceTasks(2);
FileInputFormat.setInputPaths(job,new Path("E:\\hadoopstu\\in\\demo4"));
Path path=new Path("E:\\hadoopstu\\in\\out9");
FileSystem fs=FileSystem.get(path.toUri(),configuration);
if (fs.exists(path))
fs.delete(path,true);
FileOutputFormat.setOutputPath(job,path);
job.waitForCompletion(true);
}
}