package my.hadoopstudy;
import java.util.*;
import java.io.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
public class TableConnect {
public static class tableMapper extends Mapper<Text, Text, Text, Text>
{
@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException
{
context.write(key,value);
}
}
public static class tableReducer extends Reducer<Text, Text, Text, Text>
{
@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException
{
List<String> gChildren=new ArrayList<String>();
List<String> gParents =new ArrayList<String>();
for(Text val : values)
{
if(val.toString().charAt(0) == '1')// the key's child.
{
gChildren.add(val.toString().substring(1));
}
else//the key's parent.
{
gParents.add(val.toString().substring(1));
}
}
for(int i=0; i<gChildren.size(); i++)
for(int j=0;j<gParents.size();j++)
{
context.write(new Text(gChildren.get(i)), new Text(gParents.get(j)));
}
}
}
public static void main(String[] args) throws Exception
{
Configuration conf = new Configuration();
conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " ");
Job job = new Job(conf, "tableConnect");
job.setJarByClass(TableConnect.class);
job.setMapperClass(tableMapper.class);
job.setReducerClass(tableReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit( job.waitForCompletion(true) ? 0 : 1);
}
}
输入数据,文本1:
代号 公司
a 1Beijing Red Star
c 1Shenzhen Thunder
b 1Guangzhou Honda
a 1Beijing Rising
b 1Guangzhou Development Bank
c 1Tencent
a 1Back of Beijing
输入数据,文本2:
代号 地方
a 2Beijing
b 2Guangzhou
c 2Shenzhen
d 2Xian
输出: