一、在(一)中,我们计算出专利被那些专利所引用,在此基础上计算被引用的次数,不难实现。我们用两种方法来完成。代码(1)如下:
import java.io.IOException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class PatentCitationsCount extends Configured implements Tool{
public static class PatentCitationsCountMapper extends Mapper<Text, Text, Text, IntWritable> {
private final IntWritable length = new IntWritable();
@Override
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
String[] array = value.toString().split("[,]");
length.set(array.length);
context.write(key, length);
}
}
public static class PatentCitationsCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private final IntWritable sumResult = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable intWritable : values) {
sum += intWritable.get();
}
sumResult.set(sum);
context.write(key, sumResult);
}
}
@Override
public int run(String[] args) throws Exception {
Job job = new Job(getConf());
job.setJarByClass(getClass());
job.setJobName("patentcitationscount");
job.setMapperClass(PatentCitationsCountMapper.class);
job.setReducerClass(PatentCitationsCountReducer.class);
job.setCombinerClass(PatentCitationsCountReducer.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setOutputKeyClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path("/patent/test/input/patentcitationscount.txt"));
FileOutputFormat.setOutputPath(job, new Path("/patent/test/outnput"));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
public static void main(String[] args) throws Exception {
int result = ToolRunner.run(new PatentCitationsCount(), args);
System.exit(result);
}
}
代码2如下:
import java.io.IOException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class PatentCitationsCountOther extends Configured implements Tool {
public static class CountOtherMapper extends Mapper<Text, Text, Text, Text> {
@Override
protected void map(Text key, Text value, Context context)
throws IOException, InterruptedException {
context.write(key, value);
}
}
public static class CountOtherReducer extends
Reducer<Text, Text, Text, IntWritable> {
private IntWritable result = new IntWritable();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (Text text : values) {
String[] array = text.toString().split("[,]");
sum += array.length;
}
result.set(sum);
context.write(key, result);
}
}
@Override
public int run(String[] args) throws Exception {
Job job = new Job(getConf());
job.setJarByClass(getClass());
job.setJobName("patentcitationscountother");
job.setMapperClass(CountOtherMapper.class);
job.setReducerClass(CountOtherReducer.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(
"/patent/test/input/patentcitationscount.txt"));
FileOutputFormat.setOutputPath(job, new Path("/patent/test/outnput"));
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
public static void main(String[] args) throws Exception {
int result = ToolRunner.run(new PatentCitationsCountOther(), args);
System.exit(result);
}
}
都可以实现相同的功能。但我不知道谁更好?