需求是:从下面的文本文件中,得到grandchild--grandparent的关系
child parent
Tom Jack
Tom Lily
Jack Lilei
Lilei Some
Lucy Some
huangpeng zhouaiqiong
zhouaiqiong zhoubenzhi
huangpeng qingzhengmao
DD huangpeng
第一行是分类的名称,第一列是子类序列,第二列是父类序列,要求在这样一个原始数据中得到grandchild---grandparent的关系
思路:
这里需要去用单表连接去做,连接的还是本身自己这张表,连接方式为左表的parent连接右表的child,Map时先将parent为key, 左表标志符号+child+parent为value写入,再将child为key, 右表标识符+child+parent为value写入。经过shuffer阶段时,自动会把相同key的归在一起,也就是说以左表的parent和右表的child连接在了一起,那么取出左表的child即为grandchild,取出右表的parent即为grandparent,在同一个reduce中,做迪卡尔乘积即可。
MainClass:
package STjoin;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MainFunc extends Configured implements Tool {
public static void main(String[] args) throws Exception {
int ret = ToolRunner.run(new MainFunc(), args);
System.exit(ret);
}
@Override
public int run(String[] args) throws Exception {
Job job = new Job(getConf());
job.setJarByClass(MainFunc.class);
job.setJobName("STjoin");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path("src/STjoin/guanxi"));
FileOutputFormat.setOutputPath(job, new Path("rst4"));
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
}
Map:
package STjoin;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Map extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
int i = 0;
while (line.charAt(i) != ' ') {
i++;
}
String child = line.substring(0, i);
String parent = line.substring(i + 1).trim();
System.out.println("****" + child);
System.out.println("=======" + parent);
if (child.compareTo("child") != 0) {
int relation = 1;
context.write(new Text(parent), new Text(relation + "+" + child
+ '+' + parent));
relation = 2;
context.write(new Text(child), new Text(relation + "+" + child
+ '+' + parent));
}
}
}
Reduce:
package STjoin;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class Reduce extends Reducer<Text, Text, Text, Text> {
private static int time = 0;
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
if (time == 0) {
context.write(new Text("grandchild"), new Text("grandparent"));
time++;
}
int grandchildnum = 0;
int grandparentnum = 0;
String[] grandchild = new String[10];
String[] grandparent = new String[10];
Iterator iter = values.iterator();
while (iter.hasNext()) {
String record = iter.next().toString();
System.out.println("@@@@@@@@@@@@@@" + record);
char relationtype = record.charAt(0);
int len = record.length();
int i = 2;
if (len == 0)
continue;
String childname = new String();
String parentname = new String();
while (record.charAt(i) != '+') {
childname += record.charAt(i);
i++;
}
i++;
while (i < len) {
parentname += record.charAt(i);
i++;
}
if (relationtype == '1') {
grandchild[grandchildnum] = childname;
grandchildnum++;
} else {
grandparent[grandparentnum] = parentname;
grandparentnum++;
}
}
if (grandchildnum != 0 && grandparentnum != 0) {
for (int m = 0; m < grandchildnum; m++) {
for (int n = 0; n < grandparentnum; n++) {
context.write(new Text(grandchild[m]), new Text(
grandparent[n]));
}
}
}
}
}
Result:
grandchild grandparent
Tom Lilei
Jack Some
DD zhouaiqiong
DD qingzhengmao
huangpeng zhoubenzhi