两张表Join应用

CustomerOrder

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class CustomerOrder implements WritableComparable<CustomerOrder> {
    private Integer customerId;
    private String customerName;
    private Integer orderId;
    private String orderStatus;
    private String flag; //标记 用订单日期作为区别是customer.csv 还是order.csv

    public CustomerOrder() {
    }

    public Integer getCustomerId() {
        return customerId;
    }

    public void setCustomerId(Integer customerId) {
        this.customerId = customerId;
    }

    public String getCustomerName() {
        return customerName;
    }

    public void setCustomerName(String customerName) {
        this.customerName = customerName;
    }

    public Integer getOrderId() {
        return orderId;
    }

    public void setOrderId(Integer orderId) {
        this.orderId = orderId;
    }

    public String getOrderStatus() {
        return orderStatus;
    }

    public void setOrderStatus(String orderStatus) {
        this.orderStatus = orderStatus;
    }

    public String getFlag() {
        return flag;
    }

    public void setFlag(String flag) {
        this.flag = flag;
    }

    @Override
    public String toString() {
        return "CustomerOrder{" +
                "customerId=" + customerId +
                ", customerName='" + customerName + '\'' +
                ", orderId=" + orderId +
                ", orderStatus='" + orderStatus + '\'' +
                ", flag='" + flag + '\'' +
                '}';
    }

    public CustomerOrder(Integer customerId, String customerName, Integer orderId, String orderStatus, String flag) {
        this.customerId = customerId;
        this.customerName = customerName;
        this.orderId = orderId;
        this.orderStatus = orderStatus;
        this.flag = flag;
    }

    @Override
    public int compareTo(CustomerOrder o) {
        return 0;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeInt(customerId);
        dataOutput.writeUTF(customerName);
        dataOutput.writeInt(orderId);
        dataOutput.writeUTF(orderStatus);
        dataOutput.writeUTF(flag);

    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.customerId=dataInput.readInt();
        this.customerName=dataInput.readUTF();
        this.orderId=dataInput.readInt();
        this.orderStatus=dataInput.readUTF();
        this.flag=dataInput.readUTF();

    }
}

JoinMapper

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

public class JoinMapper extends Mapper<LongWritable, Text,Text,CustomerOrder> {
    //后面会用 全局变量
    String name="";
    CustomerOrder customerOrder=new CustomerOrder();
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        //初始化 两个文件 只调用一次 获取一次名称
        FileSplit inputSplit=(FileSplit)context.getInputSplit();
        System.out.println(inputSplit.getPath().toString());
        name=inputSplit.getPath().getName();
     }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //获取一行
        String[] fields = value.toString().split(",");
        System.out.println("JoinMapper:"+value.toString());

        //2.判断是哪个文件的
          if (name.startsWith("order")) { //订单表 订单表里不包含用户信息
            //1,2013/7/25 0:00,11599,CLOSED
            //封装k/v
            customerOrder.setCustomerId(Integer.parseInt(fields[2]));
            customerOrder.setCustomerName("");
            customerOrder.setOrderId(Integer.parseInt(fields[0]));
            customerOrder.setOrderStatus(fields[3]);
            customerOrder.setFlag(fields[1]);
        }else{                          //顾客表 顾客表里不包含订单信息
            //1,Richard,Hernandez,XXXXXXXXX,XXXXXXXXX,6303 Heather Plaza,Brownsville,TX,78521
            customerOrder.setCustomerId(Integer.parseInt(fields[0]));
            customerOrder.setCustomerName(fields[1]);
            customerOrder.setOrderId(-1);
            customerOrder.setOrderStatus("");
            customerOrder.setFlag("0");

        }
        Text text=new Text(customerOrder.getCustomerId().toString());

        context.write(text,customerOrder);
    }
}

JoinPartitioner

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class JoinPartitioner extends Partitioner<Text,CustomerOrder> {
    @Override
    public int getPartition(Text text, CustomerOrder customerOrder, int i) {
        Integer customerId=Integer.parseInt(text.toString());//转换数据
        if (customerId%2==0)//奇数偶数
        return 0;
        else
            return 1;
    }
}

JoinReducer

import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.List;

//不要的就写NullWritable
public class JoinReducer extends Reducer<Text,CustomerOrder,CustomerOrder, NullWritable> {
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        super.setup(context);
    }

    @Override
    protected void reduce(Text key, Iterable<CustomerOrder> values, Context context) throws IOException, InterruptedException {
        //准备初始化集合
     CustomerOrder customer=new CustomerOrder();
     List<CustomerOrder>orders=new ArrayList<>();

        //循环遍历
        for (CustomerOrder customerOrder:
             values) {
            if (customerOrder.getFlag().equals("0")){ //顾客 只有一个
                try {
                    //工具类 只有一行 不需要add
                    BeanUtils.copyProperties(customer,customerOrder);
                } catch (IllegalAccessException e) {
                    e.printStackTrace();
                } catch (InvocationTargetException e) {
                    e.printStackTrace();
                }
            }else{//订单 其他都是订单
                CustomerOrder order=new CustomerOrder();
                try {
                    BeanUtils.copyProperties(order,customerOrder);
                    orders.add(order);//ArrayList
                } catch (IllegalAccessException e) {
                    e.printStackTrace();
                } catch (InvocationTargetException e) {
                    e.printStackTrace();
                }

            }

        }
        //循环遍历orders 赋值CustomerName
        for(CustomerOrder order:orders){
            //从customer中取
            order.setCustomerName(customer.getCustomerName());
            context.write(order,NullWritable.get());

        }
    }


}

JoinDriver

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class JoinDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration configuration=new Configuration();
        Job job=Job.getInstance(configuration);

        job.setJarByClass(JoinDriver.class);

        job.setMapperClass(JoinMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(CustomerOrder.class);

        job.setReducerClass(JoinReducer.class);
        job.setOutputKeyClass(CustomerOrder.class);
        job.setOutputValueClass(NullWritable.class);

        job.setPartitionerClass(JoinPartitioner.class);
        job.setNumReduceTasks(2);



        FileInputFormat.setInputPaths(job,new Path("E:\\hadoopstu\\in\\demo4"));
        Path path=new Path("E:\\hadoopstu\\in\\out9");
        FileSystem fs=FileSystem.get(path.toUri(),configuration);
        if (fs.exists(path))
            fs.delete(path,true);
        FileOutputFormat.setOutputPath(job,path);

        job.waitForCompletion(true);
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值