一、在网上找的题就做了,需要借助一个Bean对象来实现排序
编写MapReduce程序算出高峰时间段(9-10点)哪张表被访问的最频繁
1. /**
2. * 用Hadoop分析海量日志文件,每行日志记录了如下数据:
3.
4. * TableName(表名),Time(时间),User(用户),TimeSpan(时间开销)
5.
6. * 要求编写MapReduce程序算出高峰时间段(如9-10点)哪张表被访问的最频繁
7.
8. * 以及这段时间访问这张表最多的用户,以及这个用户访问这张表的总时间开销。
9. * 先找出9-10点访问量最大的表
TableName(表名),Time(时间),User(用户),TimeSpan(时间开销)
==========================================================
*t003 6:00 u002 180
*t003 7:00 u002 180
*t003 7:08 u002 180
*t003 7:25 u002 180
*t002 8:00 u002 180
*t001 8:00 u001 240
*t001 9:00 u002 300
*t001 9:11 u001 240
*t003 9:26 u001 180
*t001 9:39 u001 300
二、代码部分
package com.wangs.Max;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Fangwen {
/**
* t001 9:00 u002 300
*
* t001: u002 300
*
*/
public static class M1 extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String[] split = value.toString().split(" ");
if (split[1].startsWith("9") || split[1].equals("10:00")) {
context.write(new Text(split[0]), new Text(split[2] + "\t"
+ split[3]));
}
}
}
/**
* t001: [u002 300,u001 150....]
*
*/
public static class R1 extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values,
Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
int count = 0;// 表被访问的次数
Map<String, Integer> ucounts = new HashMap<>();// 各个用户访问表的次数
Map<String, Integer> utimes = new HashMap<>();// 各个用户访问表的时间开销
for (Text t : values) {// u002 300
String[] split = t.toString().split("\t");
String uname = split[0];// 用户名u002
int utime = Integer.valueOf(split[1]);// 时间开销300
if (ucounts.get(uname) == null) {
ucounts.put(uname, 1);
utimes.put(uname, utime);
} else {
int newCount = ucounts.get(uname) + 1;
ucounts.put(uname, newCount);
int newTime = utimes.get(uname) + utime;// 新的时间开销
utimes.put(uname, newTime);
}
count += 1;
}
// 求出了表被访问的次数
// 各个用户的访问次数
// 各个用户的时间开销
int maxcount = Integer.MIN_VALUE;
Iterator<Integer> iterator = ucounts.values().iterator();
while (iterator.hasNext()) {
Integer c = iterator.next();
if (c > maxcount)
maxcount = c;
}
// 找到被用户访问的最大次数
Iterator<String> i2 = ucounts.keySet().iterator();
while (i2.hasNext()) {
String uname = i2.next();//
if (ucounts.get(uname) == maxcount) {
// 输出... 表名/表访问次数/用户名/用户访问次数/时间开销
context.write(key, new Text(count + "\t" + uname + "\t"
+ maxcount + "\t" + utimes.get(uname)));
}
}
}
}
public static class M2 extends Mapper<LongWritable, Text, Bean, Text> {
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, Bean, Text>.Context context)
throws IOException, InterruptedException {
String[] split = value.toString().split("\t");
Bean b = new Bean();
b.setTcount(Integer.valueOf(split[1]));
context.write(b, new Text(split[0] + "\t" + split[2] + "\t"
+ split[3] + "\t" + split[4]));
}
}
public static class R2 extends Reducer<Bean, Text, Text, NullWritable> {
int tcountMax = Integer.MIN_VALUE;
@Override
protected void reduce(Bean key, Iterable<Text> values,
Reducer<Bean, Text, Text, NullWritable>.Context context)
throws IOException, InterruptedException {
if (key.getTcount() >= tcountMax) {
tcountMax = key.getTcount();
for (Text t : values) {
context.write(t, NullWritable.get());
}
}
}
}
public static void main(String[] args) throws Exception {
// 创建job
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(Fangwen.class);
job.setMapperClass(M1.class);
job.setReducerClass(R1.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
Path path = new Path(args[1]);
FileSystem fs = FileSystem.get(conf);
if (fs.exists(path))
fs.delete(path, true);
FileOutputFormat.setOutputPath(job, path);
boolean b = job.waitForCompletion(true);// 等待任务完成
if (b) {// 如果求和任务成功,开始排序任务
Job job2 = Job.getInstance(conf);
job2.setJarByClass(Fangwen.class);
job2.setMapperClass(M2.class);
job2.setReducerClass(R2.class);
job2.setMapOutputKeyClass(Bean.class);
job2.setMapOutputValueClass(Text.class);
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job2, new Path(args[1]));
Path path2 = new Path(args[2]);
if (fs.exists(path2))
fs.delete(path2, true);
FileOutputFormat.setOutputPath(job2, path2);
job2.waitForCompletion(true);// 等待任务完成
}
}
}
三、Bean对象部分
package com.wangs.Max;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class Bean implements WritableComparable<Bean> {
private int tcount;
public int getTcount() {
return tcount;
}
public void setTcount(int tcount) {
this.tcount = tcount;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(tcount);
}
@Override
public void readFields(DataInput in) throws IOException {
tcount = in.readInt();
}
@Override
public int compareTo(Bean o) {
return o.getTcount() - tcount;
}
}