输入
Tom Lucy
Tom Jack
Jone Locy
Jone Jack
Lucy Mary
Lucy Ben
Jack Alice
Jack Jesse
Terry Alice
Terry Jesse
Philip Alma
Mark Terry
Mark Alma
输出
grandchild grandparent
Jone Jesse
Jone Alice
Tom Jesse
Tom Alice
Tom Ben
Tom Mary
Mark Alice
Mark Jesse
代码
import java.io.*;
import java.util.Random;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.io.*;
public class joinself extends Configured implements Tool{
enum Counter
{
LineSkip,
}
public static int time=0;
public static class map extends Mapper<Object,Text,Text,Text>
{
@Override
public void map(Object key,Text value,Context context)throws IOException,InterruptedException
{
String line=value.toString();
String[] linesplit=line.split(" ");
context.write(new Text(linesplit[1]), new Text("1_" + linesplit[0]));
context.write(new Text(linesplit[0]),new Text("2_"+linesplit[1]));
}
}
public static class reduce extends Reducer<Text,Text,Text,Text>
{
@Override
public void reduce(Text key,Iterable<Text>value,Context context)throws IOException,InterruptedException
{
if(time==0)
{
context.write(new Text("grandchild"), new Text("grandparent"));
time++;
}
String[] son=new String[10];
int sonnum=0;
String[] parent=new String[10];
int parnum=0;
while(value.iterator().hasNext())
{
String va=value.iterator().next().toString();
String[] split=va.split("_");
if("1".equals(split[0]))
{
son[sonnum]=split[1];
sonnum++;
}
if("2".equals(split[0]))
{
parent[parnum]=split[1];
parnum++;
}
}
if(sonnum>0&&parnum>0)
{
for(int i=0;i<sonnum;i++)
for(int j=0;j<parnum;j++)
context.write(new Text(son[i]), new Text(parent[j]));
}
}
}
public int run(String[] args)throws Exception
{
Configuration conf=getConf();
Job job=new Job(conf,"sort");
job.setJarByClass(joinself.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(map.class);
job.setReducerClass(reduce.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.waitForCompletion(true);
return job.isSuccessful()?0:1;
}
public static void main(String[] args)throws Exception
{
int res=ToolRunner.run(new Configuration(),new joinself(),args);
System.exit(res);
}
}