MapReduce设计模式:求最小值、最大值和计数

MapReduce设计模式:求最小值、最大值和计数的MapReduce设计模型如下:

MinMaxCount  类如下:

package com;

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;

public class MinMaxCount extends Configured implements Tool{

public static class MinMaxCountMapper extends Mapper<Object,Text,Text,MinMaxCountTuple>{
private MinMaxCountTuple outTuple = new MinMaxCountTuple();
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
String strValue = value.toString();
String[] arrValue = strValue.split("\t");
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date date;
try {
date = sdf.parse(arrValue[1]);
outTuple.setMax(date);
outTuple.setMin(date);
outTuple.setCount(1);
Text UserID = new Text(arrValue[0]);
context.write(UserID, outTuple);
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

public static class MinMaxCountReducer extends Reducer<Text, MinMaxCountTuple,Text,MinMaxCountTuple>{
private MinMaxCountTuple result = new MinMaxCountTuple();
public void reduce(Text key, Iterable<MinMaxCountTuple> values,Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
result.setMax(null);
result.setMin(null);
  result.setCount(0);
int sum = 0;
for(MinMaxCountTuple val : values)
{
if(result.getMin() == null || val.getMin().compareTo(result.getMin())< 0 )
{
result.setMin(val.getMin());
}
if(result.getMax() == null || val.getMax().compareTo(result.getMax())>0)
{
result.setMax(val.getMax());
}
sum+=val.getCount();
}
result.setCount(sum);
context.write(key, result);
}
}

@Override
public int run(String[] arg0) throws Exception {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "MinMaxCount");
job.setJarByClass(MinMaxCount.class);
job.setMapperClass(MinMaxCountMapper.class);
job.setReducerClass(MinMaxCountReducer.class);
job.setCombinerClass(MinMaxCountReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(MinMaxCountTuple.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(MinMaxCountTuple.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path("hdfs://192.168.26.131:9000/design/minmaxcount/in/minmaxcount"));
Path out_path = new Path("hdfs://192.168.26.131:9000/design/minmaxcount/out");
FileSystem.get(conf).delete(out_path, true);
FileOutputFormat.setOutputPath(job, out_path);
//job.setNumReduceTasks(0);
job.waitForCompletion(true);
return 0;
}

}

MinMaxCountTuple 类如下:

package com;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.io.Writable;

public class MinMaxCountTuple implements Writable {
private Date min = new Date();
private Date max = new Date();
private long count = 0;
public Date getMin() {
return min;
}
public void setMin(Date min) {
this.min = min;
}
public Date getMax() {
return max;
}
public void setMax(Date max) {
this.max = max;
}
public long getCount() {
return count;
}
public void setCount(long count) {
this.count = count;
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
 System.out.println("readFields readFields readFields");
        min = new Date(in.readLong());
        max = new Date(in.readLong());
        count = in.readLong();
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
System.out.println("write write write");
out.writeLong(min.getTime());
out.writeLong(max.getTime());
out.writeLong(count);
}
public String toString(){
System.out.println("toString toString toString");
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return sdf.format(min)+"\t"+sdf.format(max)+"\t"+count;
}
}

测试函数类如下:

package com;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ToolRunner;

public class Test_MinMaxCount {
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
ToolRunner.run(new Configuration(), new MinMaxCount(), args); 
//ToolRunner.run(new Configuration(), new Test(), args); 
}
}

输入的测试数据如下:

1001 2012-10-12 10:11:12
1002 2013-2-9 3:9:8
1002 2013-9-10 10:3:21
1001 2013-2-9 13:19:18
1003 2011-1-19 12:21:12
1003 2015-9-10 11:3:2
1003 2012-2-19 12:21:12
1003 2015-12-11 17:3:12
1002 2011-12-9 13:9:13
1002 2001-9-10 10:3:2
1001 2013-12-19 3:1:1

程序输出结果为:

1001 2012-10-12 10:11:12 2013-12-19 03:01:01 3
1002 2001-09-10 10:03:02 2013-09-10 10:03:21 4
1003 2011-01-19 12:21:12 2015-12-11 17:03:12 4

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

风逝老大

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值