MapReduce综合练习之上网流量统计

最新推荐文章于 2022-02-09 10:04:07 发布

黄道婆

最新推荐文章于 2022-02-09 10:04:07 发布

阅读量331

点赞数

分类专栏： bigdata 文章标签： mapreduce

本文链接：https://blog.csdn.net/elizabethxxy/article/details/108761508

版权

该博客介绍了使用MapReduce进行上网流量统计的三个任务：1. 统计每个手机号的上行和下行流量总和；2. 上行流量倒序排序；3. 手机号按特定规则分区。通过自定义MapReduce的Mapper、Reducer和分区类实现这些功能。

摘要由CSDN通过智能技术生成

====
MapReduce综合练习之上网流量统计

需求一：统计求和
统计每个手机号的上行流量总和，下行流量总和，上行总流量之和，下行总流量之和
分析：以手机号码作为key值，上行流量，下行流量，上行总流量，下行总流量四个字段作为value值，然后以这个key，和value作为map阶段的输出，reduce阶段的输入
代码定义如下：
第一步：自定义map的输出value对象FlowBean
package cn.itcast.mr.demo3.flow;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class FlowBean implements Writable{
private Integer upFlow;
private Integer downFlow;
private Integer upCountFlow;
private Integer downCountFlow;

/**
* 序列化的方法
* @param out
* @throws IOException
*/
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(upFlow);
out.writeInt(downFlow);
out.writeInt(upCountFlow);
out.writeInt(downCountFlow);

}

/**
* 反序列化的方法
* @param in
* @throws IOException
*/
@Override
public void readFields(DataInput in) throws IOException {
this.upFlow = in.readInt();
this.downFlow = in.readInt();
this.upCountFlow = in.readInt();
this.downCountFlow = in.readInt();

}

public Integer getUpFlow() {
return upFlow;
}

public void setUpFlow(Integer upFlow) {
this.upFlow = upFlow;
}

public Integer getDownFlow() {
return downFlow;
}

public void setDownFlow(Integer downFlow) {
this.downFlow = downFlow;
}

public Integer getUpCountFlow() {
return upCountFlow;
}

public void setUpCountFlow(Integer upCountFlow) {
this.upCountFlow = upCountFlow;
}

public Integer getDownCountFlow() {
return downCountFlow;
}

public void setDownCountFlow(Integer downCountFlow) {
this.downCountFlow = downCountFlow;
}

@Override
public String toString() {
return upFlow+"\t"+downFlow+"\t"+upCountFlow+"\t"+downCountFlow;
}
}

第二步：定义FlowMapper类
package cn.itcast.mr.demo3.flow;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class FlowMapper extends Mapper<LongWritable,Text,Text,FlowBean>{

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().split("\t");
String phon = split[1];
FlowBean flowBean = new FlowBean();
flowBean.setUpFlow(Integer.parseInt(split[6]));
flowBean.setDownFlow(Integer.parseInt(split[7]));
flowBean.setUpCountFlow(Integer.parseInt(split[8]));
flowBean.setDownCountFlow(Integer.parseInt(split[9]));
context.write(new Text(phon),flowBean);

}
}

第三步：定义FlowReducer类
package cn.itcast.mr.demo3.flow;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowReducer extends Reducer<Text,FlowBean,Text,FlowBean> {

@Override
protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
int upFlow = 0;
int downFlow = 0;
int upCountFlow = 0;
int downCountFlow = 0;
for (FlowBean value : values) {
upFlow += value.getUpFlow();
downFlow += value.getDownFlow();
upCountFlow += value.getUpCountFlow();
downCountFlow += value.getDownCountFlow();