将人员的地址ID完善成为地址名称。输出格式:人员ID,姓名,地址
测试数据:
address.txt
#地址ID 地址名称
1 北京
2 上海
3 广州
employee.txt
#人员ID 人员名称 地址ID
1 张三 1
2 李四 2
3 王五 1
4 赵六 3
5 马七 3
User实体类
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class User implements WritableComparable<User> {
private String userNo = "";
private String userName = "";
private String cityNo = "";
private String cityName = "";
private int flag = 0; //0:城市/1:人员
public User() {
}
public User(User user) {
this.userNo = user.getUserNo();
this.userName = user.getUserName();
this.cityNo = user.getCityNo();
this.cityName = user.getCityName();
this.flag = user.getFlag();
}
public User(String userNo, String userName, String cityNo, String cityName, int flag) {
this.userNo = userNo;
this.userName = userName;
this.cityNo = cityNo;
this.cityName = cityName;
this.flag = flag;
}
@Override
public String toString() {
return this.userNo + " " + this.userName + " " + this.cityName;
}
@Override
public int compareTo(User o) {
return 0;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(this.userNo);
dataOutput.writeUTF(this.userName);
dataOutput.writeUTF(this.cityNo);
dataOutput.writeUTF(this.cityName);
dataOutput.writeInt(this.flag);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.userNo = dataInput.readUTF();
this.userName = dataInput.readUTF();
this.cityNo = dataInput.readUTF();
this.cityName = dataInput.readUTF();
this.flag = dataInput.readInt();
}
public String getUserNo() {
return userNo;
}
public void setUserNo(String userNo) {
this.userNo = userNo;
}
public String getUserName() {
return userName;
}
public void setUserName(String userName) {
this.userName = userName;
}
public String getCityNo() {
return cityNo;
}
public void setCityNo(String cityNo) {
this.cityNo = cityNo;
}
public String getCityName() {
return cityName;
}
public void setCityName(String cityName) {
this.cityName = cityName;
}
public int getFlag() {
return flag;
}
public void setFlag(int flag) {
this.flag = flag;
}
}
mapper
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class JoinTwoMapper extends Mapper<LongWritable, Text, Text, User> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString().trim();
if(!line.startsWith("#")) {
String[] arr = line.split("\t");
if(arr.length == 2) {//城市信息
User user = new User();
user.setCityNo(arr[0]);
user.setCityName(arr[1]);
user.setFlag(0);
context.write(new Text(arr[0]), user);
} else {//人员信息
User user = new User();
user.setUserNo(arr[0]);
user.setUserName(arr[1]);
user.setCityNo(arr[2]);
user.setFlag(1);
context.write(new Text(arr[2]), user);
}
}
}
}
reducer
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class JoinTwoReducer extends Reducer<Text, User, NullWritable, Text> {
@Override
protected void reduce(Text key, Iterable<User> values, Context context) throws IOException, InterruptedException {
User cityInfo = null;
List<User> list = new ArrayList<User>();
for(User user : values) {
if(user.getFlag() == 0) {//城市信息
cityInfo = new User(user);
} else if(user.getFlag() == 1) {//人员信息
list.add(new User(user));
}
}
//遍历人员信息
for(User user : list) {
user.setCityName(cityInfo.getCityName());
context.write(NullWritable.get(), new Text(user.toString()));
}
}
}
job
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* 将人员的地址ID完善成为地址名称,输出格式:人员ID,姓名,地址
*/
public class JobMain {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
if(args.length != 2) {
System.err.println("Usage: Join<input path> <output path>");
System.exit(-1);
}
Configuration conf = new Configuration();
Job job = Job.getInstance(conf,"Join job2");
job.setJarByClass(JobMain.class);
job.setMapperClass(JoinTwoMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(User.class);
job.setReducerClass(JoinTwoReducer.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path outDirPath = new Path(args[1]);
FileSystem fs = FileSystem.get(conf);
if (fs.exists(outDirPath)) {
fs.delete(outDirPath, true);
}
FileOutputFormat.setOutputPath(job, outDirPath);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
结果:
3 王五 北京
1 张三 北京
2 李四 上海
5 马七 广州
4 赵六 广州