MapReduce Design Patterns-chapter 2

本章探讨MapReduce的总结模式,包括一小时内评论长度的最大、最小、求和及平均值计算,以及中位数和标准差的求解。通过不同方法展示MapReduce在处理这些统计任务中的应用,如利用Combiner优化效率。
摘要由CSDN通过智能技术生成

CHAPTER 2:Summarization Patterns

一小时内发表评论长度的最大最小以及求和

public class MinMaxCountTuple implements Writable {
    private Date min = new Date();
    private Date max = new Date();
    private long count = 0;
    private final static SimpleDateFormat frmt = new SimpleDateFormat(
            "yyyy-MM-dd'T'HH:mm:ss.SSS");
    public Date getMin() {
        return min;
    }
    public void setMin(Date min) {
        this.min = min;
    }
    public Date getMax() {
        return max;
    }
    public void setMax(Date max) {
        this.max = max;
    }
    public long getCount() {
        return count;
    }
    public void setCount(long count) {
        this.count = count;
    }
    public void readFields(DataInput in) throws IOException {
        // Read the data out in the order it is written,
        // creating new Date objects from the UNIX timestamp
        min = new Date(in.readLong());
        max = new Date(in.readLong());
        count = in.readLong();
    }
    public void write(DataOutput out) throws IOException {
	// Write the data out in the order it is read,
        // using the UNIX timestamp to represent the Date
        out.writeLong(min.getTime());
        out.writeLong(max.getTime());
        out.writeLong(count);
    }
    public String toString() {
        return frmt.format(min) + "\t" + frmt.format(max) + "\t" + count;
    }
}


public static class MinMaxCountMapper extends
   Mapper<Object, Text, Text, MinMaxCountTuple> {
    // Our output key and value Writables
    private Text outUserId = new Text();
    private MinMaxCountTuple outTuple = new MinMaxCountTuple();
    // This object will format the creation date string into a Date object
    private final static SimpleDateFormat frmt =
                        new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS");
    public void map(Object key, Text value, Context context)
            throws IOException, InterruptedException {
        Map<String, String> parsed = transformXmlToMap(value.toString());
        // Grab the "CreationDate" field since it is what we are finding
        // the min and max value of
        String strDate = parsed.get("CreationDate");
        // Grab the “UserID” since it is what we are grouping by
	String userId = parsed.get("UserId");
        // Parse the string into a Date object
        Date creationDate = frmt.parse(strDate);
        // Set the minimum and maximum date values to the creationDate
        outTuple.setMin(creationDate);
        outTuple.setMax(creationDate);
        // Set the comment count to 1
        outTuple.setCount(1);
        // Set our user ID as the output key
        outUserId.set(userId);
        // Write out the hour and the average comment length
        context.write(outUserId, outTuple);
    }
}


public static class MinMaxCountReducer extends
    Reducer<Text, MinMaxCountTuple, Text, MinMaxCountTuple> {
    // Our output value Writable
    private MinMaxCountTuple result = new MinMaxCountTuple();
    public void reduce(Text key, Iterable<MinMaxCountTuple> values,
            Context context) throws IOException, InterruptedException {
        // Initialize our result
        result.setMin(null);
        result.setMax(null);
        result.setCount(0);
        int sum = 0;
        // Iterate through all input values for this key
        for (MinMaxCountTuple val : values) {
            // If the value's min is less than the result's min
            // Set the result's min to value's
            if (result.getMin() == null ||
			val.getMin().compareTo(result.getMin()) < 0) {
                result.setMin(val.getMin());
            }
            //
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值