Hadoop 统计专利被那些专利所引用(一)

一、以下是测试数据:

"CITING","CITED"
3858241,956203
3858241,1324234
3858241,3398406
3858241,3557384
3858241,3634889
3858242,1515701
3858242,3319261
3858242,3668705
3858242,3707004
3858243,2949611
3858243,3146465
3858243,3156927
3858243,3221341
3858243,3574238
3858243,3681785
3858243,3684611
3858244,14040
3858244,17445
3858245,17445

 注:第一列是专利号,第二列是引用的专利号。

二、Hadoop 代码如下:

 

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class PatentCitations extends Configured implements Tool {

	public static class PatentCitationsMapper extends Mapper<Text, Text, Text, Text> {

		@Override
		protected void map(Text key, Text value, Context context)
				throws IOException, InterruptedException {
			context.write(value, key);
		}
	}
	
	public static class PatentCitationsReduces extends Reducer<Text, Text, Text, Text> {
		
		private static Text staticVal = new Text();
		
		@Override
		protected void reduce(Text key, Iterable<Text> values,Context context)
				throws IOException, InterruptedException {
			StringBuilder sb = new StringBuilder();
			for (Text value : values) {
				if (sb.length() > 0) {
					sb.append(",");
				}
				sb.append(value.toString());
			}
			staticVal.set(sb.toString());
			context.write(key,staticVal);
		}
		
	}
	
	@Override
	public int run(String[] args) throws Exception {
		Configuration conf = getConf();
		conf.set(KeyValueLineRecordReader.KEY_VALUE_SEPERATOR, ",");
		
		Job job = new Job(getConf());
		job.setJarByClass(getClass());
		job.setJobName("patentcitations");
		
		job.setInputFormatClass(KeyValueTextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		job.setMapperClass(PatentCitationsMapper.class);
		job.setReducerClass(PatentCitationsReduces.class);
		
		FileInputFormat.setInputPaths(job, new Path("/patent/test/input/file1.txt"));
		FileOutputFormat.setOutputPath(job, new Path("/patent/test/output"));
		
		//FileInputFormat.setInputPaths(job, new Path(args[0]));
		//FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		boolean success = job.waitForCompletion(true);
		return success ? 0: 1;
	}

	public static void main(String[] args) throws Exception{
		int result = ToolRunner.run(new PatentCitations(), args);
		System.exit(result);
	}

}

 三、执行结果如下:

 

 

"CITED"	"CITING"
1324234	3858241
14040	3858244
1515701	3858242
17445	3858245,3858244
2949611	3858243
3146465	3858243
3156927	3858243
3221341	3858243
3319261	3858242
3398406	3858241
3557384	3858241
3574238	3858243
3634889	3858241
3668705	3858242
3681785	3858243
3684611	3858243
3707004	3858242
956203	3858241

 注:17445 分别被 3858245,3858244 所引用。

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值