MapReduce的Wordcount求top

//自定义MyMapper类继承Mapper
public class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

private Text outkey = new Text();
private IntWritable outval = new IntWritable(1);
private String [] strs = null;

//实现map方法
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
		throws IOException, InterruptedException {
		//                      文件分隔符\t   
	strs = value.toString().split("\t");	
	for (String s : strs) {
		outkey.set(s);
		context.write(outkey, outval);
}

}

}

//自定义MyReducer类继承Reducer
public class MyReducer extends Reducer<Text, IntWritable, Text, LongWritable>{

private LongWritable outval = new LongWritable();
private long sum = 0l;
//定义Map集合         将统计后的结果放到MAP集合中
private Map<String,Long>map = new HashMap<String,Long>();
Text  outkey = new Text();
@Override
protected void reduce(Text outkey, Iterable<IntWritable> values,
		Reducer<Text, IntWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
	sum = 0l;
	for (IntWritable i : values) {
	sum += i.get();
	}
	outval.set(sum);
	map.put(outkey.toString(), outval.get());
}



@Override //实现cleanup方法
protected void cleanup(Reducer<Text, IntWritable, Text, LongWritable>.Context context)
		throws IOException, InterruptedException {
	//将map转为LinkedList     ,这里是打算使用list集合特有的工具类 collections  ,sort排序
	LinkedList<Map.Entry<String, Long>> list = new LinkedList<Map.Entry<String, Long>> (map.entrySet());
	
	//    sort排序                                    定义比较器
	Collections.sort(list, new Comparator<Map.Entry<String, Long>>() {

		@Override
		public int compare(Entry<String, Long> o1, Entry<String, Long> o2) {
			
			//   定义比较规则         降序
			return o2.getValue().compareTo(o1.getValue());
			
		}
	});
	//top1     
	/*outkey.set(list.getFirst().getKey());
	outval.set(list.getFirst().getValue());
	context.write(outkey, outval);*/
	
	
	//top5          当然设定循环值  top100都可以,前提你的有100个key
	for(int i = 0;i<5 ; i++){
		outkey.set(list.get(i).getKey());
		outval.set(list.get(i).getValue());
		
		context.write(outkey, outval);}
}

}

public class MyDriver {
//这里呢,用了main方法,也可以继承Configured 实现Tool接口
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, “myWordCount”);

	job.setJarByClass(MyDriver.class);
	job.setMapperClass(MyMapper.class);
	job.setReducerClass(MyReducer.class);
	
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(IntWritable.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(LongWritable.class);
	
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	Path in = new Path (args[0]);
	Path out = new Path (args[1]);
	

	FileSystem fs = FileSystem.get(conf);
	if(fs.exists(out)){
		fs.delete(out,true);
		System.out.println("目录已干掉!!!");
	}
	

FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);


	long startTime = System.currentTimeMillis();
	boolean con = job.waitForCompletion(true);
	long endTime = System.currentTimeMillis();
	
	if(con){
		System.out.println("job_status: ok!!!");
	}else{
		System.out.println("job_status :fail!!!");
	}
	System.out.println("运行时间"+(endTime-startTime)/1000+"s");
}
	
}
  • 0
    点赞
  • 0
    评论
  • 1
    收藏
  • 打赏
    打赏
  • 扫一扫,分享海报

©️2022 CSDN 皮肤主题:游动-白 设计师:我叫白小胖 返回首页

打赏作者

Z113014

你的鼓励将是我创作的最大动力

¥2 ¥4 ¥6 ¥10 ¥20
输入1-500的整数
余额支付 (余额:-- )
扫码支付
扫码支付:¥2
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、C币套餐、付费专栏及课程。

余额充值