mapreduce join mysql_mapreduce——join算法的代码实现

需求:有user数据文件:user.csv

u001,senge,18,angelababy

u002,laozhao,48,ruhua

u003,xiaoxu,16,chunge

u004,laoyang,28,zengge

u005,nana,14,huangbo

有订单数据文件:order.dat.1  order.dat.2   order.dat.3

order001,u001

order002,u001

order003,u005

order004,u002

order005,u003

order006,u004

需要对上述两类数据进行连接:

order001,u001,senge,18,angelababy

order002,u001,senge,18,angelababy

order003,u005,nana,14,huangbo

order004,u002,laozhao,48,ruhua

order005,u003,xiaoxu,16,chunge

order006,u004,laoyang,28,zengge

思路:

map端:

不管worker读到的是什么文件,我们的map方法中是可以通过context来区分的

对于order数据,map中切字段,封装为一个joinbean,打标记:t_order

对于user数据,map中切字段,封装为一个joinbean,打标记:t_user

然后,以uid作为key,以joinbean作为value返回

reduce端:

用迭代器迭代出一组相同uid的所有数据joinbean,然后判断

如果是标记字段为t_order的,则加入一个arraylist中

如果标记字段为t_user的,则放入一个Joinbean对象中

然后,遍历arraylist,对里面的每一个JoinBean填充userBean中的user数据,然后输出这个joinBean即可

代码实现

JoinBean

public class JoinBean implements Writable {

private String orderId;

private String userId;

private String userName;

private int userAge;

private String userFriend;

private String tableName;

public void set(String orderId, String userId, String userName, int userAge, String userFriend,String tableName) {

this.orderId = orderId;

this.userId = userId;

this.userName = userName;

this.userAge = userAge;

this.userFriend = userFriend;

this.tableName=tableName;

}

public String getOrderId() {

return orderId;

}

public void setOrderId(String orderId) {

this.orderId = orderId;

}

public String getUserId() {

return userId;

}

public void setUserId(String userId) {

this.userId = userId;

}

public String getUserName() {

return userName;

}

public void setUserName(String userName) {

this.userName = userName;

}

public int getUserAge() {

return userAge;

}

public void setUserAge(int userAge) {

this.userAge = userAge;

}

public String getUserFriend() {

return userFriend;

}

public void setUserFriend(String userFriend) {

this.userFriend = userFriend;

}

public String getTableName() {

return tableName;

}

public void setTableName(String tableName) {

this.tableName = tableName;

}

@Override

public String toString() {

return this.orderId+","+this.userId+","+this.userAge

+","+this.userName+","+this.userFriend;

}

public void write(DataOutput dataOutput) throws IOException {

dataOutput.writeUTF(this.orderId);

dataOutput.writeUTF(this.userId);

dataOutput.writeUTF(this.userName);

dataOutput.writeInt(this.userAge);

dataOutput.writeUTF(this.userFriend);

dataOutput.writeUTF(this.tableName);

}

public void readFields(DataInput dataInput) throws IOException {

this.orderId=dataInput.readUTF();

this.userId=dataInput.readUTF();

this.userName=dataInput.readUTF();

this.userAge=dataInput.readInt();

this.userFriend=dataInput.readUTF();

this.tableName=dataInput.readUTF();

}

}

ReduceSideJoin

public class ReduceSideJoin {

public static class ReduceSideJoinMapper extends Mapper{

String fileName =null;

JoinBean bean = new JoinBean();

Text k=new Text();

/**

* maptask在做数据处理的时候,会先调用一次setup(只会调用一次)

* @param context

* @throws IOException

* @throws InterruptedException

*/

@Override

protected void setup(Context context) throws IOException, InterruptedException {

FileSplit inputSplit = (FileSplit) context.getInputSplit();

fileName=inputSplit.getPath().getName();

}

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String[] fields = value.toString().split(",");

if (fileName.startsWith("order")){

bean.set(fields[0],fields[1],"NULL",-1,"NULL","order");

}else{

bean.set("NULL",fields[0],fields[1],Integer.parseInt(fields[2]),fields[3],"user");

}

k.set(bean.getUserId());

context.write(k,bean);

}

}

public static class ReducerSideJoinReduce extends Reducer{

@Override

protected void reduce(Text key, Iterable beans, Context context) throws IOException, InterruptedException {

ArrayList orderList = new ArrayList();

JoinBean userBean=null;

try {

for (JoinBean bean:beans){

//区分两类数据

if("order".equals(bean.getTableName())){

JoinBean newBean = new JoinBean();

BeanUtils.copyProperties(newBean,bean);

orderList.add(newBean);

}else{

userBean=new JoinBean();

BeanUtils.copyProperties(userBean,bean);

}

}

//拼接数据userBean数据加入到orderBean

for(JoinBean bean:orderList){

bean.setUserAge(userBean.getUserAge());

bean.setUserFriend(userBean.getUserFriend());

bean.setUserName(userBean.getUserName());

context.write(bean,NullWritable.get());

System.out.println(userBean.getUserFriend());

}

} catch (Exception e) {

e.printStackTrace();

}

}

}

public static void main(String[] args)throws Exception {

Configuration conf = new Configuration();

Job job = Job.getInstance(conf);

//动态获取jar包在哪里

job.setJarByClass(ReduceSideJoin.class);

//2.封装参数:本次job所要调用的mapper实现类

job.setMapperClass(ReduceSideJoinMapper.class);

job.setReducerClass(ReducerSideJoinReduce.class);

//3.封装参数:本次job的Mapper实现类产生的数据key,value的类型

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(JoinBean.class);

//4.封装参数:本次Reduce返回的key,value数据类型

job.setOutputKeyClass(JoinBean.class);

job.setOutputValueClass(NullWritable.class);

FileInputFormat.setInputPaths(job,new Path("F:\\mrdata\\join\\input"));

FileOutputFormat.setOutputPath(job,new Path("F:\\mrdata\\join\\out"));

boolean res = job.waitForCompletion(true);

System.exit(res ? 0:-1);

}

}

测试数据

order.txt.1

order001,u001

order002,u001

order003,u005

order004,u002

order005,u003

order006,u004

order.txt.2

order001,u001

order002,u001

order003,u005

order004,u002

order005,u003

order006,u004

user.txt

u001,senge,18,angelababy

u002,laozhao,48,ruhua

u003,xiaoxu,16,chunge

u004,laoyang,28,zengge

u005,nana,14,huangbo

结果输出

order002,u001,18,senge,angelababy

order001,u001,18,senge,angelababy

order002,u001,18,senge,angelababy

order001,u001,18,senge,angelababy

order004,u002,48,laozhao,ruhua

order004,u002,48,laozhao,ruhua

order005,u003,16,xiaoxu,chunge

order005,u003,16,xiaoxu,chunge

order006,u004,28,laoyang,zengge

order006,u004,28,laoyang,zengge

order003,u005,14,nana,huangbo

order003,u005,14,nana,huangbo

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值