输入数据
第一列:child
第二列:parent
输出数据
根据输入数据的父子关系,输出grandchild 和 grandparent 表格
第一列:grandchild
第二列:grandparent
代码
package com.test.Correlation;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
//import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class TableCorrelation {
public static class TableCorrelationMapper extends Mapper<Object, Text, Text, Text> {
@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
if (line.length()>0){
String child = new String();
String parent = new String();
StringTokenizer st = new StringTokenizer(line);
child = st.nextToken();
parent = st.nextToken();
if (child.equals("child")){
} else{
context.write(new Text(parent), new Text("0"+" "+child));
context.write(new Text(child), new Text("2"+" "+parent));
}
}
}
}
public static class TableCorrelationReducer extends Reducer<Text, Text, Text, Text> {
private static int lineNum = 0;
@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
if (lineNum==0){
context.write(new Text("grandchild"), new Text("grandparent"));
lineNum ++;
} else {
String[] grandchild = new String[10];
String[] grandparent = new String[10];
int grandchildnum = 0;
int grandparentnum = 0;
for(Text val: values){
String tmp = val.toString();
StringTokenizer st = new StringTokenizer(tmp);
st.nextToken();
String name = st.nextToken();
if (tmp.charAt(0)=='0'){
grandchild[grandchildnum] = name;
grandchildnum += 1;
}
if (tmp.charAt(0)=='2'){
grandparent[grandparentnum] = name;
grandparentnum += 1;
}
}
if (grandchildnum != 0 && grandparentnum != 0){
for(int ii=0; ii<grandchildnum; ii++){
for(int jj=0; jj<grandparentnum; jj++){
context.write(new Text(grandchild[ii]), new Text(grandparent[jj]));
}
}
}
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "TableCorrelation");
job.setJarByClass(TableCorrelation.class);
job.setMapperClass(TableCorrelationMapper.class);
job.setReducerClass(TableCorrelationReducer.class);
//job.setMapOutputKeyClass(Text.class);
//job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job , new Path(args[1]));
System.exit(job.waitForCompletion(true)?0:1);
}
}