MapReduce找出访问量最大的表

最新推荐文章于 2020-02-16 12:04:13 发布

温柔女友力

最新推荐文章于 2020-02-16 12:04:13 发布

阅读量801

点赞数 1

分类专栏：大数据;Hadoop;MapReduce

本文链接：https://blog.csdn.net/Switfy/article/details/77484331

版权

大数据;Hadoop;MapReduce 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

一、在网上找的题就做了，需要借助一个Bean对象来实现排序

编写MapReduce程序算出高峰时间段（9-10点）哪张表被访问的最频繁

1. /**

2. * 用Hadoop分析海量日志文件，每行日志记录了如下数据：

4. * TableName(表名)，Time(时间)，User(用户)，TimeSpan(时间开销)

6. * 要求编写MapReduce程序算出高峰时间段（如9-10点）哪张表被访问的最频繁

8. * 以及这段时间访问这张表最多的用户，以及这个用户访问这张表的总时间开销。

9. * 先找出9-10点访问量最大的表

TableName(表名)，Time(时间)，User(用户)，TimeSpan(时间开销)

==========================================================

*t003 6:00 u002 180

*t003 7:00 u002 180

*t003 7:08 u002 180

*t003 7:25 u002 180

*t002 8:00 u002 180

*t001 8:00 u001 240

*t001 9:00 u002 300

*t001 9:11 u001 240

*t003 9:26 u001 180

*t001 9:39 u001 300

二、代码部分

package com.wangs.Max;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class Fangwen {

	/**
	 * t001 9:00 u002 300
	 * 
	 * t001: u002 300
	 * 
	 */
	public static class M1 extends Mapper<LongWritable, Text, Text, Text> {
		@Override
		protected void map(LongWritable key, Text value,
				Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			String[] split = value.toString().split(" ");
			if (split[1].startsWith("9") || split[1].equals("10:00")) {
				context.write(new Text(split[0]), new Text(split[2] + "\t"
						+ split[3]));
			}
		}
	}

	/**
	 * t001: [u002 300,u001 150....]
	 * 
	 */

	public static class R1 extends Reducer<Text, Text, Text, Text> {
		@Override
		protected void reduce(Text key, Iterable<Text> values,
				Reducer<Text, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			int count = 0;// 表被访问的次数
			Map<String, Integer> ucounts = new HashMap<>();// 各个用户访问表的次数
			Map<String, Integer> utimes = new HashMap<>();// 各个用户访问表的时间开销
			for (Text t : values) {// u002 300
				String[] split = t.toString().split("\t");
				String uname = split[0];// 用户名u002
				int utime = Integer.valueOf(split[1]);// 时间开销300

				if (ucounts.get(uname) == null) {
					ucounts.put(uname, 1);
					utimes.put(uname, utime);
				} else {
					int newCount = ucounts.get(uname) + 1;
					ucounts.put(uname, newCount);
					int newTime = utimes.get(uname) + utime;// 新的时间开销
					utimes.put(uname, newTime);
				}

				count += 1;
			}
			// 求出了表被访问的次数
			// 各个用户的访问次数
			// 各个用户的时间开销

			int maxcount = Integer.MIN_VALUE;
			Iterator<Integer> iterator = ucounts.values().iterator();
			while (iterator.hasNext()) {
				Integer c = iterator.next();
				if (c > maxcount)
					maxcount = c;
			}
			// 找到被用户访问的最大次数
			Iterator<String> i2 = ucounts.keySet().iterator();
			while (i2.hasNext()) {
				String uname = i2.next();//
				if (ucounts.get(uname) == maxcount) {
					// 输出... 表名/表访问次数/用户名/用户访问次数/时间开销
					context.write(key, new Text(count + "\t" + uname + "\t"
							+ maxcount + "\t" + utimes.get(uname)));
				}
			}
		}
	}

	public static class M2 extends Mapper<LongWritable, Text, Bean, Text> {
		@Override
		protected void map(LongWritable key, Text value,
				Mapper<LongWritable, Text, Bean, Text>.Context context)
				throws IOException, InterruptedException {
			String[] split = value.toString().split("\t");
			Bean b = new Bean();
			b.setTcount(Integer.valueOf(split[1]));
			context.write(b, new Text(split[0] + "\t" + split[2] + "\t"
					+ split[3] + "\t" + split[4]));
		}
	}

	public static class R2 extends Reducer<Bean, Text, Text, NullWritable> {

		int tcountMax = Integer.MIN_VALUE;

		@Override
		protected void reduce(Bean key, Iterable<Text> values,
				Reducer<Bean, Text, Text, NullWritable>.Context context)
				throws IOException, InterruptedException {
			if (key.getTcount() >= tcountMax) {
				tcountMax = key.getTcount();
				for (Text t : values) {
					context.write(t, NullWritable.get());
				}
			}
		}
	}

	public static void main(String[] args) throws Exception {
		// 创建job
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		job.setJarByClass(Fangwen.class);

		job.setMapperClass(M1.class);
		job.setReducerClass(R1.class);

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

		FileInputFormat.setInputPaths(job, new Path(args[0]));
		Path path = new Path(args[1]);
		FileSystem fs = FileSystem.get(conf);
		if (fs.exists(path))
			fs.delete(path, true);
		FileOutputFormat.setOutputPath(job, path);
		boolean b = job.waitForCompletion(true);// 等待任务完成
		if (b) {// 如果求和任务成功，开始排序任务
			Job job2 = Job.getInstance(conf);
			job2.setJarByClass(Fangwen.class);

			job2.setMapperClass(M2.class);
			job2.setReducerClass(R2.class);

			job2.setMapOutputKeyClass(Bean.class);
			job2.setMapOutputValueClass(Text.class);
			job2.setOutputKeyClass(Text.class);
			job2.setOutputValueClass(NullWritable.class);

			FileInputFormat.setInputPaths(job2, new Path(args[1]));
			Path path2 = new Path(args[2]);
			if (fs.exists(path2))
				fs.delete(path2, true);
			FileOutputFormat.setOutputPath(job2, path2);
			job2.waitForCompletion(true);// 等待任务完成
		}
	}
}

三、Bean对象部分

package com.wangs.Max;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class Bean implements WritableComparable<Bean> {
	private int tcount;

	public int getTcount() {
		return tcount;
	}

	public void setTcount(int tcount) {
		this.tcount = tcount;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeInt(tcount);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		tcount = in.readInt();
	}

	@Override
	public int compareTo(Bean o) {
		return o.getTcount() - tcount;
	}

}

温柔女友力

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
MapReduce找出访问量最大的表

一、在网上找的题就做了，需要借助一个Bean对象来实现排序编写MapReduce程序算出高峰时间段（9-10点）哪张表被访问的最频繁1. /**2. *用Hadoop分析海量日志文件，每行日志记录了如下数据：3. 4. * TableName(表名)，Time(时间)，User(用户)，TimeSpan(时间开销)5. 6.
复制链接

扫一扫