多表联查
需求:从多文件输出factoryname address
数据:
文本一:
factoryname id
a 1
b 2
c 3
d 4
e 5
文本二:
id address
1 A
2 B
3 C
4 D
5 E
输出效果:
factoryname address
a A
b B
c C
d D
e E
java code
package com.mxm.day26_1;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class JoinTest2 extends Configured implements Tool {
public static class MyMapper extends Mapper<LongWritable, Text, Text, Text> {
boolean flag = false;
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
if (line.contains("factoryname")) {
flag = true;
return;
} else if (line.contains("address")) {
flag = false;
return;
}
String[] split = line.split("\t");
if (flag) {
String id = split[1];
String factoryname = split[0];
context.write(new Text(id), new Text("1-" + factoryname));
} else {
String id = split[0];
String address = split[1];
context.write(new Text(id), new Text("2-" + address));
}
}
}
public static class MyReducer extends Reducer<Text, Text, Text, Text> {
boolean flag = false;
@Override
protected void reduce(Text key, Iterable<Text> value, Context context)
throws IOException, InterruptedException {
if (!flag) {
context.write(new Text("factoryname"), new Text("address"));
flag = true;
}
String factoryname = "";
String address = "";
for (Text v : value) {
String[] split = v.toString().split("-");
if (split[0].equals("1")) {
factoryname = split[1];
} else if (split[0].equals("2")) {
address = split[1];
}
}
context.write(new Text(factoryname), new Text(address));
}
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
Job job = Job.getInstance(conf);
job.setJarByClass(JoinTest2.class);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
int status = job.waitForCompletion(true) ? 0 : -1;
return status;
}
public static void main(String[] args) throws Exception {
int status = ToolRunner.run(new Configuration(), new JoinTest2(), args);
System.exit(status);
}
}