order.txt :
order011,u001
order012,u001
order033,u005
order034,u002
order055,u003
order066,u004
user.txt:
u001,senge,18,male,angelababy
u002,ss,58,male,ruhua
u003,shuaishuai,16,female,chunge
u004,laoyang,28,female,zengge
u005,nana,24,female,huangbo
u006,dingding,19,male,taojiji
实现的结果:
uid=‘u001’, uname=‘senge’, age=18, gender=‘male’, friend=‘angelababy’, oid=‘order012’
… …
定义一个Bean类,封装两个文件中所有的属性,实现Hadoop的序列化接口
public class JoinBean implements Writable {
private String uid;
private String uname;
private int age;
private String gender;
private String friend;
private String oid;
private String table;
public void set(String uid, String uname, int age, String gender, String friend, String oid, String table) {
this.uid = uid;
this.uname = uname;
this.age = age;
this.gender = gender;
this.friend = friend;
this.oid = oid;
this.table = table;
}
public String getUid() {
return uid;
}
public void setUid(String uid) {
this.uid = uid;
}
public String getUname() {
return uname;
}
public void setUname(String uname) {
this.uname = uname;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public String getGender() {
return gender;
}
public void setGender(String gender) {
this.gender = gender;
}
public String getFriend() {
return friend;
}
public void setFriend(String friend) {
this.friend = friend;
}
public String getOid() {
return oid;
}
public void setOid(String oid) {
this.oid = oid;
}
public String getTable() {
return table;
}
public void setTable(String table) {
this.table = table;
}
@Override
public String toString() {
return "JoinBean{" +
"uid='" + uid + '\'' +
", uname='" + uname + '\'' +
", age=" + age +
", gender='" + gender + '\'' +
", friend='" + friend + '\'' +
", oid='" + oid + '\'' +
" }";
}
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(uid);
dataOutput.writeUTF(uname);
dataOutput.writeInt(age);
dataOutput.writeUTF(gender);
dataOutput.writeUTF(friend);
dataOutput.writeUTF(oid);
dataOutput.writeUTF(table);
}
public void readFields(DataInput dataInput) throws IOException {
uid = dataInput.readUTF();
uname = dataInput.readUTF();
age = dataInput.readInt();
gender = dataInput.readUTF();
friend = dataInput.readUTF();
oid = dataInput.readUTF();
table = dataInput.readUTF();
}
}
MapReduce
public class Order {
static class OrderMapper extends Mapper<LongWritable, Text,Text,JoinBean>{
String fileName = null;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
FileSplit fileSplit = (FileSplit) context.getInputSplit();
//获取文件名
fileName = fileSplit.getPath().getName();
}
JoinBean joinBean = new JoinBean();
Text k = new Text();
@Override
//key --> uid, value--> bean
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
//读到的是order数据
if (fileName.startsWith("order")){
//order002,u006
String[] split = line.split(",");
//为属性赋值
String oid = split[0];
String uid = split[1];
/*注意 除了oid和uid 要给序列化的类的所有的属性赋相应的数据类型值
但不能赋值null,null不能序列化,会报空指针异常*/
joinBean.set(uid,"-1",-1,"-1","-1",oid,"order");
}else {
//u001,senge,18,male,angelababy
String[] split = line.split(",");
//读到user数据 为属性赋值
String uid = split[0];
String uname = split[1];
int age = Integer.parseInt(split[2]);
String gender = split[3];
String friend = split[4];
joinBean.set(uid,uname,age,gender,friend,"1","user");
}
String uid = joinBean.getUid();
k.set(uid);
context.write(k,joinBean);
}
}
static class OrderReducer extends Reducer<Text,JoinBean,JoinBean, NullWritable>{
@Override
protected void reduce(Text key, Iterable<JoinBean> values, Context context) throws IOException, InterruptedException {
try{
//遍历uid的value,user数据只有一条,获取的是order信息,添加到集合,遍历集合拼接用户数据
List<JoinBean> orderList = new ArrayList<JoinBean>();
JoinBean userBean = new JoinBean();
for (JoinBean value : values) {
if ("order".equals(value.getTable())){
/*如果直接将value添加到list集合,这里添加的是一个对象,会覆盖掉前面的值,
所以重新创建一个对象,将属性赋值给新的对象*/
JoinBean joinBean = new JoinBean();
//将value的属性赋值给joinbean
BeanUtils.copyProperties(joinBean,value);
orderList.add(joinBean);
}else {
BeanUtils.copyProperties(userBean,value);
}
}
//拼接用户信息
if (orderList.size()>0){
for (JoinBean order : orderList) {
order.set(userBean.getUid(),userBean.getUname(),userBean.getAge(),userBean.getGender(),userBean.getFriend(),order.getOid(),"a");
context.write(order,NullWritable.get());
}
}
}catch (Exception e){
e.printStackTrace();
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "join");
//设置map reduce 逻辑类
job.setMapperClass(OrderMapper.class);
job.setReducerClass(OrderReducer.class);
//设置输出类
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(JoinBean.class);
job.setOutputKeyClass(JoinBean.class);
job.setOutputValueClass(NullWritable.class);
// job.setNumReduceTasks(2);
//设置数据输入 输出
FileInputFormat.setInputPaths(job, new Path("D:\\MR\\join\\input"));
FileOutputFormat.setOutputPath(job, new Path("D:\\MR\\join\\output"));
job.waitForCompletion(true);
}
}