Hadoop MapReduce开发--两个输入数据关联优化方案(增加排序功能)

两表关联后,并对某字段进行排序

测试数据:

address.txt

#地址ID    地址名称
1    北京
2    上海
3    广州

employee.txt

#人员ID    人员名称    地址ID
1    张三    1
2    李四    2
3    王五    1
4    赵六    3
5    马七    3
 

首先,bean实体类:User

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class User implements WritableComparable<User> {
    private String userNo = "";
    private String userName = "";
    private String cityNo = "";
    private String cityName = "";

    public User() {
    }

    public User(User user) {
        this.userNo = user.getUserNo();
        this.userName = user.getUserName();
        this.cityNo = user.getCityNo();
        this.cityName = user.getCityName();
    }

    public User(String userNo, String userName, String cityNo, String cityName) {
        this.userNo = userNo;
        this.userName = userName;
        this.cityNo = cityNo;
        this.cityName = cityName;
    }

    @Override
    public String toString() {
        return  this.userNo + "    " + this.userName + "    " + this.cityName;
    }

    @Override
    public int compareTo(User o) {
        return 0;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(this.userNo);
        dataOutput.writeUTF(this.userName);
        dataOutput.writeUTF(this.cityNo);
        dataOutput.writeUTF(this.cityName);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.userNo = dataInput.readUTF();
        this.userName = dataInput.readUTF();
        this.cityNo = dataInput.readUTF();
        this.cityName = dataInput.readUTF();
    }

    public String getUserNo() {
        return userNo;
    }

    public void setUserNo(String userNo) {
        this.userNo = userNo;
    }

    public String getUserName() {
        return userName;
    }

    public void setUserName(String userName) {
        this.userName = userName;
    }

    public String getCityNo() {
        return cityNo;
    }

    public void setCityNo(String cityNo) {
        this.cityNo = cityNo;
    }

    public String getCityName() {
        return cityName;
    }

    public void setCityName(String cityName) {
        this.cityName = cityName;
    }

}

UserKey:

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class UserKey implements WritableComparable<UserKey> {
    private int cityNo;
    private boolean isPrimary;//true:city   /   false:user

    public UserKey() {
    }

    public UserKey(int cityNo, boolean isPrimary) {
        this.cityNo = cityNo;
        this.isPrimary = isPrimary;
    }

    @Override
    public int hashCode() {//partition使用key的hashCode方法决定该记录发往哪个reduce
        return this.cityNo;
    }

    @Override
    public int compareTo(UserKey o) {
        if(this.cityNo == o.getCityNo()) {
            if(this.isPrimary == o.isPrimary()) {
                return 0;
            } else {
                return this.isPrimary ?  -1 : 1;
            }
        } else {
            return this.cityNo - o.getCityNo() > 0 ? 1 : -1;
        }
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeInt(this.cityNo);
        dataOutput.writeBoolean(this.isPrimary);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.cityNo = dataInput.readInt();
        this.isPrimary = dataInput.readBoolean();
    }
    public int getCityNo() {
        return cityNo;
    }

    public void setCityNo(int cityNo) {
        this.cityNo = cityNo;
    }

    public boolean isPrimary() {
        return isPrimary;
    }

    public void setPrimary(boolean primary) {
        isPrimary = primary;
    }
}
GroupComparator:
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class GroupComparator extends WritableComparator {
    public GroupComparator() {
        super(UserKey.class, true);
    }

    @Override
    public int compare(WritableComparable a, WritableComparable b) {
        UserKey a1 = (UserKey) a;
        UserKey b1 = (UserKey) b;
        if(a1.getCityNo() == b1.getCityNo()) {
            return 0;
        } else {
            return a1.getCityNo() > b1.getCityNo() ? 1 : -1;
        }
    }
}

Mapper:

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class JoinThreeMapper extends Mapper<LongWritable, Text, UserKey, User> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        if(!line.startsWith("#")) {
            String[] arr = line.split("\t");
            if (arr.length == 2) {//城市信息
                User city = new User();
                city.setCityNo(arr[0]);
                city.setCityName(arr[1]);

                UserKey uKey = new UserKey();
                uKey.setCityNo(Integer.valueOf(arr[0]));
                uKey.setPrimary(true);

                context.write(uKey, city);
            } else {
                User user = new User();
                user.setUserNo(arr[0]);
                user.setUserName(arr[1]);
                user.setCityNo(arr[2]);

                UserKey uKey = new UserKey();
                uKey.setCityNo(Integer.valueOf(arr[2]));
                uKey.setPrimary(false);

                context.write(uKey, user);
            }
        }
    }
}

Reducer:

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class JoinThreeReducer extends Reducer<UserKey, User, NullWritable, Text> {
    @Override
    protected void reduce(UserKey key, Iterable<User> userues, Context context) throws IOException, InterruptedException {
        User city = null;
        int num = 0;
        for(User user : userues) {
            if(num == 0) {
                city = new User(user);
                num++;
            } else {
                user.setCityName(city.getCityName());
                context.write(NullWritable.get(), new Text(user.toString()));
            }
        }
    }
}

job:

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * 将人员的地址ID完善成为地址名称,输出格式:人员ID,姓名,地址----优化方案:对城市进行排序
 */
public class JobMain {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        if(args.length != 2) {
            System.err.println("Usage: JoinData<input path> <output path>");
            System.exit(-1);
        }

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf,"Join job3");
        job.setJarByClass(JobMain.class);

        job.setMapperClass(JoinThreeMapper.class);
        job.setMapOutputKeyClass(UserKey.class);
        job.setMapOutputValueClass(User.class);

        job.setReducerClass(JoinThreeReducer.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);

        //reduce阶段分组
        job.setGroupingComparatorClass(GroupComparator.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));

        Path outDirPath = new Path(args[1]);
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(outDirPath)) {
            fs.delete(outDirPath, true);
        }
        FileOutputFormat.setOutputPath(job, outDirPath);

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

结果:

3    王五    北京
1    张三    北京
2    李四    上海
5    马七    广州
4    赵六    广州

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值