Hadoop中Join连接的处理

关于Join连接的思路和代码
当有二张表需要连接如

u001,senge,18,male,angelababy u002,58,male,ruhua
u003,shuaishuai,16,female,chunge u004,laoyang,28,female,zengge
u005,nana,24,female,huangbo u006,dingding,19,male,taojiji
order001,u006
order002,u006
order003,u005
order004,u006
order005,u003
order006,u002
order011,u001
order012,u001
order033,u005
order034,u002
order055,u003
order066,u004
首先分析这二张表:我们要将他们连接需要通过共同的Uid进行关联
在MapReduce中,
在Map阶段,
Uid做key,JoinBean序列化做value
如果单纯的用Uid做key,那么可能会导致最后拼接Order表中相互拼接Uid
所以要区分,那么就以表名来区分内容
在map阶段前的setup方法,提前与map阶段,让我们添加我们的需求
FileSplit fileSplit = (FileSplit)context.getInputSplit();
fsname = fileSplit.getPath().getName();
获取了个个表名后开始以Uid做key
注意需要序列化,获取JoinBean后,在Reduce阶段
以表名来获取value内的值
重点是在迭代器中只有一个对象,我们用list。add的时候必须new出一个对象,否则最后只会有最后一个数据

工具类BeanUtils.copyProperties(new,old)传入新对象和旧对象
把旧对象的值赋给新的对象


```java
package com._yzq55iou.Join;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class JoinBean2 implements Writable {
    private String oid;
    private String uid;
    private String name;
    private int age;
    private String friends;
    private String gender;
    private String tableName;

    public void set(String oid, String uid, String name, int age, String friends, String gender, String tableName) {
        this.oid = oid;
        this.uid = uid;
        this.name = name;
        this.age = age;
        this.friends = friends;
        this.gender = gender;
        this.tableName = tableName;
    }

    public JoinBean2() {
    }

    public String getOid() {
        return oid;
    }

    public void setOid(String oid) {
        this.oid = oid;
    }

    public String getUid() {
        return uid;
    }

    public void setUid(String uid) {
        this.uid = uid;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public int getAge() {
        return age;
    }

    public void setAge(int age) {
        this.age = age;
    }

    public String getFriends() {
        return friends;
    }

    public void setFriends(String friends) {
        this.friends = friends;
    }

    public String getGender() {
        return gender;
    }

    public void setGender(String gender) {
        this.gender = gender;
    }

    public String getTableName() {
        return tableName;
    }

    public void setTableName(String tableName) {
        this.tableName = tableName;
    }

    @Override
    public String toString() {
        return "JoinBean2{" +
                "oid='" + oid + '\'' +
                ", uid='" + uid + '\'' +
                ", name='" + name + '\'' +
                ", age=" + age +
                ", friends='" + friends + '\'' +
                ", gender='" + gender + '\'' +
                ", tableName='" + tableName + '\'' +
                '}';
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
   /*     private String oid;
        private String uid;
        private String name;
        private int age;
        private String friends;
        private String gender;
        private String tableName;*/
        dataOutput.writeUTF(oid);
        dataOutput.writeUTF(uid);
        dataOutput.writeUTF(name);
        dataOutput.writeInt(age);
        dataOutput.writeUTF(friends);
        dataOutput.writeUTF(gender);
        dataOutput.writeUTF(tableName);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.oid=dataInput.readUTF();
        this.uid=dataInput.readUTF();
        this.name=dataInput.readUTF();
        this.age=dataInput.readInt();
        this.friends=dataInput.readUTF();
        this.gender=dataInput.readUTF();
        this.tableName=dataInput.readUTF();
    }
}



package com._yzq55iou.Join;

import com._51doit.movie.Movie_CollectionsRate;
import com._51doit.pojo.Movie;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;

public class JoinUAndO {

static class JoinUAndOMapper extends Mapper<LongWritable, Text,Text, JoinBean2>{
    Text k=new Text();
    IntWritable v=new IntWritable();
    String fsname=null;
    JoinBean2 bean=new JoinBean2();
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        FileSplit fileSplit = (FileSplit)context.getInputSplit();
         fsname = fileSplit.getPath().getName();
    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().split(",");

        if(fsname.startsWith("order")){
            bean.setOid(split[0]);
            bean.setUid(split[1]);
            bean.setName("\001");
            bean.setTableName("order");
            bean.setAge(-1);
            bean.setGender("\001");
            bean.setFriends("\001");
        }else {
            bean.setUid(split[0]);
            bean.setName(split[1]);
            bean.setAge(Integer.parseInt(split[2]));
            bean.setGender(split[3]);
            bean.setFriends(split[4]);
            bean.setTableName("user");
            bean.setOid("\001");
        }
        k.set(bean.getUid());

        context.write(k,bean);


    }
}
static class JoinUAndOReduce extends Reducer<Text,JoinBean2,JoinBean2, NullWritable>{

    @Override
    protected void reduce(Text key, Iterable<JoinBean2> values, Context context) throws IOException, InterruptedException {
        try {
            JoinBean2 users=new JoinBean2();
            ArrayList<JoinBean2> orderlist = new ArrayList<>();
            for (JoinBean2 us : values) {
                String tableName = us.getTableName();
                if(tableName.equals("order")){
                    JoinBean2 or=new JoinBean2();
                 /*   JoinBean2  neworder=   new JoinBean2();
                    neworder.set(us.getOid(),us.getUid(),us.getName(),us.getAge(),us.getFriends(),us.getGender(),us.getTableName());
                    orderlist.add(neworder);
*/
                    BeanUtils.copyProperties(or,us);
                    orderlist.add(or);
                }else {
                   // users.set(us.getOid(),us.getUid(),us.getName(),us.getAge(),us.getFriends(),us.getGender(),us.getTableName());
                    BeanUtils.copyProperties(users,us);
                }
            }
            for (JoinBean2 bean2 : orderlist) {
                bean2.setName(users.getName());
                bean2.setAge(users.getAge());
                bean2.setFriends(users.getFriends());
                bean2.setGender(users.getGender());
                context.write(bean2,NullWritable.get());

            }
        } catch ( Exception e) {

        }


    }
}

public static void main(String[] args) throws  Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setMapperClass(JoinUAndOMapper.class);
    job.setReducerClass(JoinUAndOReduce.class);

    job.setMapOutputKeyClass(Text.class);

    job.setMapOutputValueClass(JoinBean2.class);

    job.setOutputKeyClass(JoinBean2.class);
    job.setOutputValueClass(NullWritable.class);


    FileInputFormat.setInputPaths(job,new Path("E:\\anliwenjianjia\\mrdata\\join\\input"));
    FileOutputFormat.setOutputPath(job,new Path("E:\\anliwenjianjia\\mrdata\\join\\output2"));

    job.waitForCompletion(true);

}

}


 
 
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值