1.Bean
package com.oracle.join;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
public class InfoBean implements Writable{
private int order_id;
private String dateString;
private String p_id;
private int amout;
private String pname;
private int category_id;
private float price;
private String flag;
@Override
public void readFields(DataInput in) throws IOException {
this.order_id = in.readInt();
this.dateString = in.readUTF();
this.p_id = in.readUTF();
this.amout = in.readInt();
this.pname =in.readUTF();
this.category_id = in.readInt();
this.price = in.readFloat();
this.flag = in.readUTF();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(order_id);
out.writeUTF(dateString);
out.writeUTF(p_id);
out.writeInt(amout);
out.writeUTF(pname);
out.writeInt(category_id);
out.writeFloat(price);
out.writeUTF(flag);
}
。。。
}
2.Map
Map1:
package com.oracle.join;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class JoinMap1 extends Mapper<LongWritable, Text, Text, InfoBean>{
private Text keys;
private InfoBean values;
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String[] line = value.toString().split(" ");
keys = new Text(line[2]);
values = new InfoBean(Integer.parseInt(line[0]),line[1],line[2],Integer.parseInt(line[3]),"",0,0f,"0");
context.write(keys, values);
}
}
Map2:
package com.oracle.join;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class JoinMap2 extends Mapper<LongWritable, Text, Text, InfoBean>{
private Text keys;
private InfoBean values;
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String[] line = value.toString().split(" ");
keys = new Text(line[0]);
values = new InfoBean(0,"",line[0],0,line[1],Integer.parseInt(line[2]),Float.parseFloat(line[3]),"1");
context.write(keys, values);
}
}
3.Reduce
package com.oracle.join;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class JoinReduce extends Reducer<Text, InfoBean, Text, Text>{
@Override
protected void reduce(Text key, Iterable<InfoBean> values,Context context)
throws IOException, InterruptedException {
InfoBean infoOrder;
InfoBean infoProduct = new InfoBean();
List<InfoBean> list = new ArrayList<InfoBean>();
for(InfoBean value:values){
if(value.getFlag().equals("1")){
try {
BeanUtils.copyProperties(infoProduct, value);
} catch (Exception e) {
e.printStackTrace();
}
}else{
try{
infoOrder = new InfoBean();
BeanUtils.copyProperties(infoOrder, value);
list.add(infoOrder);
}catch(Exception e){
e.printStackTrace();
}
}
}
for(InfoBean val:list){
val.setPname(infoProduct.getPname());
val.setCategory_id(infoProduct.getCategory_id());
val.setPrice(infoProduct.getPrice());
context.write(key, new Text(val.toString()));
}
}
}
4.main
package com.oracle.join;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class JoinMain implements Tool {
private Configuration conf;
@Override
public Configuration getConf() {
conf = new Configuration();
return conf;
}
@Override
public void setConf(Configuration conf) {
this.conf = conf;
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = new JoinMain().getConf();
Job job = Job.getInstance(conf);
job.setJarByClass(JoinMain.class);
job.setJobName("JobMain");
//不再设置普通Mapl类
/*job.setMapperClass(JoinMap.class);*/
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, JoinMap1.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, JoinMap2.class);
job.setReducerClass(JoinReduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(InfoBean.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
/*FileInputFormat.addInputPath(job, new Path(args[0]));*/
FileOutputFormat.setOutputPath(job, new Path(args[2]));
return job.waitForCompletion(true)?0:1;
}
public static void main(String[] args) throws Exception {
int rel = ToolRunner.run(new JoinMain(), args);
System.exit(rel);
}
}
5.运行命令
hadoop jar *.jar /data/maps/input1 /data/maps/input2 /data/maps/output