转载地址:http://blog.csdn.net/tianjun2012/article/details/63747122
一、需求
1、如下订单表:id、date、pid、amount
1001,20160710,P0001,2
1002,20160710,P0001,3
1002,20170710,P0002,3
1001,20160710,P0001,2
1002,20140710,P0003,3
1003,20150710,P0002,3
- 1
- 2
- 3
- 4
- 5
- 6
2、如下商品信息表:id,panme,category_id,price
P0001,xiaomi5,1000,2
P0002,chuiziT1,1000,3
P0003,meizu,1001,3
- 1
- 2
- 3
其中订单表中的pid对应商品列表中的id。
如果进行聊表查询所有的详细信息,想想,如果是在sql中就非常简单了:select a.id,a.date,b.name,b.category_id,b.price from t_order a join t_product b on a.pid = b.id
3、思路:
如果利用mapreduce进行处理,其实我们可以想象,
在maptask后,同样的key被归为一组,传递给reduceTask来进行处理的,这时我如果把两个表以同一个key来map,所以最后在,reduce的时候合并起来就肯定是订单和商品的详细信息了。
二、代码实现
1、首先定义个bean包含所有的订单信息和商品信息,当同时得有一个flag来区分这个bean到底是订单,还是商品:
InfoBean1.java
package join;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
*
* Created by tianjun on 2017/3/17.
*/
public class InfoBean1 implements Writable {
private int order_id;
private String dateString;
private String p_id;
private int amount;
private String pname;
private int category_id;
private float price;
//flag=0表示封装的是订单表记录
//flag=1表示封装的是产品信息记录
private String flag;
public int getOrder_id() {
return order_id;
}
public void setOrder_id(int order_id) {
this.order_id = order_id;
}
public String getDateString() {
return dateString;
}
public void setDateString(String dateString) {
this.dateString = dateString;
}
public String getP_id() {
return p_id;
}
public void setP_id(String p_id) {
this.p_id = p_id;
}
public int getAmount() {
return amount;
}
public void setAmount(int amount) {
this.amount = amount;
}
public String getPname() {
return pname;
}
public void setPname(String pname) {
this.pname = pname;
}
public int getCategory_id() {
return category_id;
}
public void setCategory_id(int category_id) {
this.category_id = category_id;
}
public float getPrice() {
return price;
}
public void setPrice(float price) {
this.price = price;
}
public String getFlag() {
return flag;
}
public void setFlag(String flag) {
this.flag = flag;
}
public InfoBean1() {
}
public InfoBean1(int order_id, String dateString, String p_id, int amount, String pname, int category_id, float price, String flag) {
this.order_id = order_id;
this.dateString = dateString;
this.p_id = p_id;
this.amount = amount;
this.pname = pname;
this.category_id = category_id;
this.price = price;
this.flag = flag;
}
public void set(int order_id, String dateString, String p_id, int amount, String pname, int category_id, float price, String flag) {
this.order_id = order_id;
this.dateString = dateString;
this.p_id = p_id;
this.amount = amount;
this.pname = pname;
this.category_id = category_id;
this.price = price;
this.flag = flag;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(order_id);
out.writeUTF(dateString);
out.writeUTF(p_id);
out.writeInt(amount);
out.writeUTF(pname);
out.writeInt(category_id);
out.writeFloat(price);
out.writeUTF(flag);
}
@Override
public void readFields(DataInput in) throws IOException {
order_id = in.readInt();
dateString= in.readUTF();
p_id = in.readUTF();
amount = in.readInt();
pname = in.readUTF();
category_id = in.readInt();
price = in.readFloat();
flag = in.readUTF();
}
@Override
public String toString() {
return "order_id=" + order_id +
", dateString='" + dateString + '\'' +
", p_id='" + p_id + '\'' +
", amount=" + amount +
", pname='" + pname + '\'' +
", category_id=" + category_id +
", price=" + price;
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
- 106
- 107
- 108
- 109
- 110
- 111
- 112
- 113
- 114
- 115
- 116
- 117
- 118
- 119
- 120
- 121
- 122
- 123
- 124
- 125
- 126
- 127
- 128
- 129
- 130
- 131
- 132
- 133
- 134
- 135
- 136
- 137
- 138
- 139
- 140
- 141
- 142
- 143
- 144
- 145
- 146
- 147
- 148
- 149
- 150
- 151
- 152
- 153
2、join功能的实现:
Rjoin.java
package join;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
/**
* Created by tianjun on 2017/3/17.
*/
public class Rjoin {
static class RjoinMapper extends Mapper<LongWritable, Text, Text, InfoBean1> {
InfoBean1 infoBean = new InfoBean1();
Text k = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
FileSplit inputSplit = (FileSplit) context.getInputSplit();
String name = inputSplit.getPath().getName();
//通过文件名判断是那种数据
String pid = "";
if (name.startsWith("order")) {
String[] fields = line.split(",");
pid = fields[2];
infoBean.set(Integer.parseInt(fields[0]), fields[1], pid, Integer.parseInt(fields[3]), "", 0, 0, "0");
} else {
String[] fields = line.split(",");
pid = fields[0];
infoBean.set(0, "", pid, 0, fields[1], Integer.parseInt(fields[2]), Float.parseFloat(fields[3]), "1");
}
k.set(pid);
context.write(k, infoBean);
}
}
static class RjoinReducer extends Reducer<Text, InfoBean1, InfoBean1, NullWritable> {
@Override
protected void reduce(Text key, Iterable<InfoBean1> values, Context context) throws IOException, InterruptedException {
InfoBean1 pdBean = new InfoBean1();
ArrayList<InfoBean1> orderBeans = new ArrayList<>();
for (InfoBean1 bean : values) {
if ("1".equals(bean.getFlag())) {
try {
BeanUtils.copyProperties(pdBean, bean);
} catch (Exception e) {
e.printStackTrace();
}
} else {
InfoBean1 odBean = new InfoBean1();
try {
BeanUtils.copyProperties(odBean, bean);
orderBeans.add(odBean);
} catch (Exception e) {
e.printStackTrace();
}
}
}
for (InfoBean1 bean : orderBeans) {
bean.setPname(pdBean.getPname());
bean.setCategory_id(pdBean.getCategory_id());
bean.setPrice(pdBean.getPrice());
context.write(bean, NullWritable.get());
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
String os = System.getProperty("os.name").toLowerCase();
if (os.contains("windows")) {
System.setProperty("HADOOP_USER_NAME", "root");
}
Configuration conf = new Configuration();
conf.set("mapreduce.framework.name","yarn");
conf.set("yarn.resourcemanager.hostname","mini01");
conf.set("fs.defaultFS","hdfs://mini01:9000/");
// 默认就是local模式
// conf.set("mapreduce.framework.name","local");
// conf.set("mapreduce.jobtracker.address","local");
// conf.set("fs.defaultFS","file:///");
Job wcjob = Job.getInstance(conf);
wcjob.setJar("F:/myWorkPlace/java/dubbo/demo/dubbo-demo/mr-demo1/target/mr.demo-1.0-SNAPSHOT.jar");
//如果从本地拷贝,是不行的,这时需要使用setJar
// wcjob.setJarByClass(Rjoin.class);
wcjob.setMapperClass(RjoinMapper.class);
wcjob.setReducerClass(RjoinReducer.class);
//设置我们的业务逻辑Mapper类的输出key和value的数据类型
wcjob.setMapOutputKeyClass(Text.class);
wcjob.setMapOutputValueClass(InfoBean1.class);
//设置我们的业务逻辑Reducer类的输出key和value的数据类型
wcjob.setOutputKeyClass(InfoBean1.class);
wcjob.setOutputValueClass(NullWritable.class);
//如果不设置InputFormat,默认就是使用TextInputFormat.class
// wcjob.setInputFormatClass(CombineFileInputFormat.class);
// CombineFileInputFormat.setMaxInputSplitSize(wcjob,4194304);
// CombineFileInputFormat.setMinInputSplitSize(wcjob,2097152);
FileSystem fs = FileSystem.get(new URI("hdfs://mini01:9000"), new Configuration(), "root");
Path path = new Path("hdfs://mini01:9000/wc/rjoin");
if (fs.exists(path)) {
fs.delete(path, true);
}
//指定要处理的数据所在的位置
FileInputFormat.setInputPaths(wcjob, new Path("hdfs://mini01:9000/input/rjoin"));
//指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(wcjob, new Path("hdfs://mini01:9000/wc/rjoin"));
boolean res = wcjob.waitForCompletion(true);
System.exit(res ? 0 : 1);
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
- 106
- 107
- 108
- 109
- 110
- 111
- 112
- 113
- 114
- 115
- 116
- 117
- 118
- 119
- 120
- 121
- 122
- 123
- 124
- 125
- 126
- 127
- 128
- 129
- 130
- 131
- 132
- 133
- 134
- 135
- 136
- 137
- 138
- 139
- 140
- 141
- 142
- 143
- 144
- 145
- 146
- 147
- 148
- 149
- 150
- 151
- 152
三、结果
处理结果如下:
order_id=1001, dateString='20150710', p_id='P0001', amount=2, pname='xiaomi5', category_id=1000, price=2.0
order_id=1002, dateString='20150710', p_id='P0001', amount=3, pname='xiaomi5', category_id=1000, price=2.0
order_id=1001, dateString='20150710', p_id='P0001', amount=2, pname='xiaomi5', category_id=1000, price=2.0
order_id=1003, dateString='20150710', p_id='P0002', amount=3, pname='chuiziT1', category_id=1000, price=3.0
order_id=1002, dateString='20150710', p_id='P0002', amount=3, pname='chuiziT1', category_id=1000, price=3.0
order_id=1002, dateString='20150710', p_id='P0003', amount=3, pname='meizu', category_id=1001, price=3.0