MapReduce执行过程详解图
编程实现MapReduce执行过程
现在Map端实现两张表的关联,将两张表的部分数据合并成一张表;
现有部门信息DEP.txt
文件和员工信息EMP.txt
文件,其内容如下:
DEP.txt: # EMP.txt:
#
1,Sales # zhang,male,20,1
2,Dev # li,female,25,2
3,Mgt3 # wang,female,30,3
# zhou,male,35,2
DEP.txt
中第一列是部门编号,第二列是部门名称;
EMP.txt
中,按顺序分别是员工姓名、性别、年龄、部门编号;
现要求将两张表合并为一张表
1、YuanGong.java
public class YuanGong implements Writable {
private String EMPname;
private String EMPgender;
private String EMPage;
private String EMPid;
private String DEPid;
private String DEPname;
private String tableFlag;//为0时是custom表,为1时是order表
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(DEPid==null?"":DEPid);
out.writeUTF(DEPname);
out.writeUTF(EMPid);
out.writeUTF(EMPname);
out.writeUTF(EMPgender);
out.writeUTF(EMPage);
out.writeUTF(tableFlag==null?"":tableFlag);
}
@Override
public void readFields(DataInput in) throws IOException {
this.DEPid=in.readUTF();
this.DEPname=in.readUTF();
this.EMPid=in.readUTF();
this.EMPname=in.readUTF();
this.EMPgender=in.readUTF();
this.EMPage=in.readUTF();
}
@Override
public String toString() {
return "部门id:" + EMPid + ",姓名:" + EMPname + ",性别:" + EMPgender + ",年龄:" + EMPage + ",部门名称:" + DEPname;
}
public String getEMPname() {
return EMPname;
}
public void setEMPname(String EMPname) {
this.EMPname = EMPname;
}
public String getEMPgender() {
return EMPgender;
}
public void setEMPgender(String EMPgender) {
this.EMPgender = EMPgender;
}
public String getEMPage() {
return EMPage;
}
public void setEMPage(String EMPage) {
this.EMPage = EMPage;
}
public String getEMPid() {
return EMPid;
}
public void setEMPid(String EMPid) {
this.EMPid = EMPid;
}
public String getDEPid() {
return DEPid;
}
public void setDEPid(String DEPid) {
this.DEPid = DEPid;
}
public String getDEPname() {
return DEPname;
}
public void setDEPname(String DEPname) {
this.DEPname = DEPname;
}
public String getTableFlag() {
return tableFlag;
}
public void setTableFlag(String tableFlag) {
this.tableFlag = tableFlag;
}
}
2、COJoinMapper.java
public class COJoinMapper extends Mapper<LongWritable, Text,Text, YuanGong> {
Map<String,String> map=new HashMap();
@Override
protected void setup(Context context) throws IOException {
URI[] cacheFiles = context.getCacheArchives();
if (cacheFiles!=null && cacheFiles.length>0){
String filePath=cacheFiles[0].getPath();
FileReader fr=new FileReader(filePath);
BufferedReader br=new BufferedReader(fr);
String line;
while ((line=br.readLine())!=null && !"".equals(line)){
String[] columns = line.split(",");
map.put(columns[0],columns[1]);
}
}
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line=value.toString();
String[] columns = line.split(",");
YuanGong co=new YuanGong();
String EMPid = columns[3];
String EMPname = columns[0];
String EMPgender = columns[1];
String EMPage = columns[2];
String DEPid = map.get(EMPid);
co.setEMPid(EMPid);
co.setEMPname(EMPname);
co.setEMPgender(EMPgender);
co.setEMPage(EMPage);
co.setDEPid(DEPid);
co.setDEPname(map.get(EMPid));
context.write(new Text(EMPid),co);
}
}
3、COJoinDriver.java
public class COJoinDriver {
public static void main(String[] args) throws Exception {
Job job= Job.getInstance(new Configuration(),"mapJoinJob");
job.setJarByClass(COJoinDriver.class);
job.setMapperClass(COJoinMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(YuanGong.class);
String inPath="file:///E:/test/EMP.txt";
String outPath="file:///E:/test/A";
String cachePath="file:///E:/test/DEP.txt";
job.addCacheArchive(new URI(cachePath));
FileInputFormat.setInputPaths(job,new Path(inPath));
FileOutputFormat.setOutputPath(job,new Path(outPath));
boolean result=job.waitForCompletion(true);
System.out.println(result?"执行成功":"执行失败");
System.exit(result?0:1);
}
}
注意路径不要写反了!!!
运行成功后会在输出路径建立一个文件夹,里面结构如下:
打开part-r-00000
就可以查看到两张表合并得到的结果