输入文件格式为:X,Y,表示X引用Y;
本程序目的为将引用数据进行倒排,对于每一个专利找到引用他的那些专利
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.KeyValueTextInputFormat;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.google.inject.Key;
import com.sun.tracing.dtrace.ArgsAttributes;
public class MyJob extends Configured implements Tool {
public static class MapClass extends MapReduceBase implements Mapper<Text, Text, Text, Text>{
public void map(Text key,Text value,OutputCollector<Text, Text> output,Reporter reporter)throws IOException{
output.collect(value, key);
}
}
public static class Reduce extends MapReduceBase implements Reducer<Text, Text, Text, Text>{
public void reduce(Text key,Iterator<Text> value,OutputCollector<Text, Text> output,Reporter reporter)throws IOException{
// int sum=0;
// while(value.hasNext()){
// value.next();
// sum++;
// }
// output.collect(key, new IntWritable(sum));
String res="";
while(value.hasNext()){
if(res.length()>0)res+=",";
res+=value.next().toString();
}
output.collect(key, new Text(res));
}
}
@Override
public int run(String[] arg0) throws Exception {
// TODO Auto-generated method stub
Configuration configuration=getConf();
JobConf job=new JobConf(configuration,MyJob.class);
FileInputFormat.setInputPaths(job, new Path(arg0[0]));
FileOutputFormat.setOutputPath(job, new Path(arg0[1]));
job.setJobName("InvertedIndex");
job.setMapperClass(MapClass.class);
job.setReducerClass(Reduce.class);
job.setInputFormat(KeyValueTextInputFormat.class);
job.setOutputFormat(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//job.setMapOutputValueClass(Text.class);
job.set("key.value.separator.in.input.line",",");
JobClient.runJob(job);
return 0;
}
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
//ToolRunner.run(conf, tool, args)
int res=ToolRunner.run(new Configuration(), new MyJob(), args);
System.exit(res);
}
}