第五个MapReduce程序----标准版(TopKey)

今天突然发现博客里竟然忘写一个正规的MR

Mapper

package club.drguo.hadoop.mapreduce.topkurl;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class TopKeyURLMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
	private FlowBean bean = new FlowBean();
	private Text k = new Text();

	@Override
	protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, FlowBean>.Context context)
			throws IOException, InterruptedException {
		String line = value.toString();
		String[] strings = StringUtils.split(line, "\t");
		try {
			if (strings.length > 32 && StringUtils.isNotEmpty(strings[26]) && strings[26].startsWith("http")) {
				String url = strings[26];
				long up_flow = Long.parseLong(strings[30]);
				long down_flow = Long.parseLong(strings[31]);
				k.set(url);
				System.out.println("+++++++++++"+up_flow);
				bean.set(up_flow, down_flow);
				context.write(k, bean);
//				System.out.println(url);
			}
		} catch (Exception e) {
			System.out.println("topkeyurlmapper有问题。。。。。");
		}
	}
}


Reducer

package club.drguo.hadoop.mapreduce.topkurl;

import java.io.IOException;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class TopKeyURLReducer extends Reducer<Text, FlowBean, Text, LongWritable>{
	//如果放在reduce里,每放一个url产生一个treemap
	private TreeMap<FlowBean, Text> treeMap = new TreeMap<>();
	//总流量
	private double globalCount = 0;
	//<url,{bean,bean...}>
	@Override
	protected void reduce(Text key, Iterable<FlowBean> values, Context context)
			throws IOException, InterruptedException {
		Text url = new Text(key.toString());
		long up_sum = 0;
		long down_sum = 0;
		for(FlowBean bean : values){
			up_sum += bean.getUp_flow();
			System.out.println("--------------"+up_sum);
			down_sum += bean.getDown_flow();
		}
		FlowBean bean = new FlowBean(up_sum, down_sum);
		globalCount += bean.getSum_flow();
		System.out.println("=============="+globalCount);
		treeMap.put(bean, url);//根据流量排序(flowbean),放到treemap中
	}
	//reduce任务即将退出时被调用一次,setup()是开始时调用一次
	@Override
	protected void cleanup(Reducer<Text, FlowBean, Text, LongWritable>.Context context)
			throws IOException, InterruptedException {
		Set<Entry<FlowBean, Text>> entrySet = treeMap.entrySet();
		double tempCount = 0;
		for(Entry<FlowBean, Text> ent : entrySet){
			//只写入占总流量的百分之八十的网站
			if(tempCount / globalCount < 0.8){
			context.write(ent.getValue(), new LongWritable(ent.getKey().getSum_flow()));
			tempCount += ent.getKey().getSum_flow();
			}else{
				return;
			}
		}
	}
}


Runner

package club.drguo.hadoop.mapreduce.topkurl;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
//club.drguo.hadoop.mapreduce.topkurl.TopKeyURLRunner
public class TopKeyURLRunner extends Configured implements Tool{
	@Override
	public int run(String[] args) throws Exception {
		Configuration configuration = new Configuration();
		Job job = Job.getInstance(configuration);
		job.setJarByClass(TopKeyURLRunner.class);
		
		job.setMapperClass(TopKeyURLMapper.class);
		job.setReducerClass(TopKeyURLReducer.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(FlowBean.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(LongWritable.class);
		
		FileInputFormat.setInputPaths(job, "hdfs://ns1/flow/srclog");
		FileOutputFormat.setOutputPath(job, new Path("hdfs://ns1/flow/topkurl"));
		
		return job.waitForCompletion(true)?0:1;
	}
	public static void main(String[] args) throws Exception {
		int res = ToolRunner.run(new Configuration(), new TopKeyURLRunner(), args);
		System.exit(res);
	}
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

光于前裕于后

您的打赏将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值