mysql结算订单表到用户_大表 + 大表 完成用户和用户订单 = 读取的方式+读取的方式...

join端连接

1.自定义key

package com.mine.hdfs.mr.mapjoin.reducejoin;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

/**

*/

public class ComboKey2 implements WritableComparable {

//0-customer 1-order

private int type ;

private int cid ;

private int oid ;

private String customerInfo = "" ;

private String orderInfo = "" ;

public int compareTo(ComboKey2 o) {

int type0 = o.type ;

int cid0= o.cid;

int oid0 = o.oid;

String customerInfo0 = o.customerInfo;

String orderInfo0 = o.orderInfo ;

//是否同一个customer的数据

if(cid == cid0){

//同一个客户的两个订单

if(type == type0){

return oid - oid0 ;

}

//一个Customer + 他的order

else{

if(type ==0)

return -1 ;

else

return 1 ;

}

}

//cid不同

else{

return cid - cid0 ;

}

}

public void write(DataOutput out) throws IOException {

out.writeInt(type);

out.writeInt(cid);

out.writeInt(oid);

out.writeUTF(customerInfo);

out.writeUTF(orderInfo);

}

public void readFields(DataInput in) throws IOException {

this.type = in.readInt();

this.cid = in.readInt();

this.oid = in.readInt();

this.customerInfo = in.readUTF();

this.orderInfo = in.readUTF();

}

}

2.自定义分区类

public class CIDPartitioner extends Partitioner{

public int getPartition(ComboKey2 key, NullWritable nullWritable, int numPartitions) {

return key.getCid() % numPartitions;

}

}

3.创建Mapper

package com.mine.hdfs.mr.mapjoin.reducejoin;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.InputSplit;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

/**

* mapper

*/

public class ReduceJoinMapper extends Mapper {

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

//

String line = value.toString() ;

//判断是customer还是order

FileSplit split = (FileSplit)context.getInputSplit();

String path = split.getPath().toString();

//客户信息

ComboKey2 key2 = new ComboKey2();

if(path.contains("customers")){

String cid = line.substring(0,line.indexOf(","));

String custInfo = line ;

key2.setType(0);

key2.setCid(Integer.parseInt(cid));

key2.setCustomerInfo(custInfo);

}

//order info

else{

String cid = line.substring(line.lastIndexOf(",") + 1);

String oid = line.substring(0, line.indexOf(","));

String oinfo = line.substring(0, line.lastIndexOf(","));

key2.setType(1);

key2.setCid(Integer.parseInt(cid));

key2.setOid(Integer.parseInt(oid));

key2.setOrderInfo(oinfo);

}

context.write(key2,NullWritable.get());

}

}

4.创建Reducer

package com.mine.hdfs.mr.mapjoin.reducejoin;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

import java.util.Iterator;

/**

* ReduceJoinReducer,reducer端连接实现。

*/

public class ReduceJoinReducer extends Reducer {

protected void reduce(ComboKey2 key, Iterable values, Context context) throws IOException, InterruptedException {

Iterator it = values.iterator();

it.next();

int type = key.getType();

int cid = key.getCid() ;

String cinfo = key.getCustomerInfo() ;

while(it.hasNext()){

it.next();

String oinfo = key.getOrderInfo();

context.write(new Text(cinfo + "," + oinfo),NullWritable.get());

}

}

}

5.创建排序对比器

package com.mine.hdfs.mr.mapjoin.reducejoin;

import com.mine.hdfs.maxtemp.allsort.secondarysort.ComboKey;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

/**

* 组合Key排序对比器

*/

public class ComboKey2Comparator extends WritableComparator {

protected ComboKey2Comparator() {

super(ComboKey2.class, true);

}

public int compare(WritableComparable a, WritableComparable b) {

ComboKey2 k1 = (ComboKey2) a;

ComboKey2 k2 = (ComboKey2) b;

return k1.compareTo(k2);

}

}

6.分组对比器

package com.mine.hdfs.mr.mapjoin.reducejoin;

import com.mine.hdfs.maxtemp.allsort.secondarysort.ComboKey;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

/**

* CID分组对比器

*/

public class CIDGroupComparator extends WritableComparator{

protected CIDGroupComparator() {

super(ComboKey2.class, true);

}

public int compare(WritableComparable a, WritableComparable b) {

ComboKey2 k1 = (ComboKey2) a;

ComboKey2 k2 = (ComboKey2) b;

return k1.getCid() - k2.getCid();

}

}

7.创建App

package com.mine.hdfs.mr.mapjoin.reducejoin;

import com.mine.hdfs.maxtemp.allsort.secondarysort.*;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**

*

*/

public class ReduceJoinApp {

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();

conf.set("fs.defaultFS","file:///");

Job job = Job.getInstance(conf);

//设置job的各种属性

job.setJobName("ReduceJoinApp");                        //作业名称

job.setJarByClass(ReduceJoinApp.class);                 //搜索类

//添加输入路径

FileInputFormat.addInputPath(job,new Path("D:\\mr\\reducejoin"));

//设置输出路径

FileOutputFormat.setOutputPath(job,new Path("D:\\mr\\reducejoin\\out"));

job.setMapperClass(ReduceJoinMapper.class);             //mapper类

job.setReducerClass(ReduceJoinReducer.class);           //reducer类

//设置Map输出类型

job.setMapOutputKeyClass(ComboKey2.class);            //

job.setMapOutputValueClass(NullWritable.class);      //

//设置ReduceOutput类型

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(NullWritable.class);         //

//设置分区类

job.setPartitionerClass(CIDPartitioner.class);

//设置分组对比器

job.setGroupingComparatorClass(CIDGroupComparator.class);

//设置排序对比器

job.setSortComparatorClass(ComboKey2Comparator.class);

job.setNumReduceTasks(2);                           //reduce个数

job.waitForCompletion(true);

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值