import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.util.Iterator;
import java.util.StringTokenizer;
class Map extends Mapper<Object, Text, Text, Text>{
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException{
StringTokenizer str = new StringTokenizer(value.toString());
String flag = new String();
String childName = new String();
String parentName = new String();
String[] values = new String[2];
int i = 0;
while( str.hasMoreTokens() ){
values[i] = str.nextToken();
i++;
}
if ( values[0].compareTo("child") != 0){
childName = values[0];
parentName = values[1];
flag = "1";
context.write(new Text(parentName), new Text(flag+" "+childName));
flag="2";
context.write(new Text(childName), new Text(flag+" "+parentName));
}
}
}
class Reduce extends Reducer<Text, Text, Text, Text>{
private static int time = 0;
public void reduce(Text key, Iterable<Text> values,Context context)
throws IOException, InterruptedException{
if (time == 0){
context.write(new Text("grandchild"), new Text("grandparent"));
time++;
}
Iterator<Text> ite = values.iterator();
int grandchildNum = 0;
int grandparentNum = 0;
String[] grandchild = new String[10];
String[] grandparent = new String[10];
while (ite.hasNext()){
String str = ite.next().toString();
if (str.charAt(0) == '2'){
grandparent[grandparentNum] = str.substring(2);
grandparentNum++;
}
else{
grandchild[grandchildNum] = str.substring(2);
grandchildNum++;
}
}
if(grandchildNum != 0 && grandparentNum != 0){
for (int m = 0; m < grandchildNum; m++) {
for (int n = 0; n < grandparentNum; n++) {
context.write(new Text(grandchild[m]), new Text(grandparent[n]));
}
}
}
}
}
public class STjoin{
public static void main(String[] args) throws Exception{
Job job = new Job();
job.setJarByClass(STjoin.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reducer.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
System.exit(job.waitForCompletion(true)?0:1);
}
}
Hadoop中单表链接
最新推荐文章于 2024-04-28 20:02:00 发布