4. Map阶段
(1)读取一行数据,切分字段
(2)抽取手机号,上行流量,下行流量
(3)以手机号为key,bean对象为value输出,即context.write(手机号,bean)
(4)bean对象要想能够传输,必须实现序列化接口
5. Reduce阶段
(1)累加上行流量和下行流量得到总流量
1. 编写流量统计的Bean对象
package com.atguigu.mr.flowsum;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
public class FlowBean implements Writable {
private long upFlow; //上行流量
private long downFlow; //下行流量
private long sumFlow; //总流量
//空参构造器
public FlowBean() {
super();
}
public FlowBean(long upFlow, long downFlow) {
super();
this.upFlow = upFlow;
this.downFlow = downFlow;
sumFlow = downFlow + upFlow;
}
//序列化方法
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeLong(upFlow);
out.writeLong(downFlow);
out.writeLong(sumFlow);
}
//反序列化方法
@Override
public void readFields(DataInput in) throws IOException {
// 必须要求和序列化方法的顺序一致
upFlow = in.readLong();
downFlow = in.readLong();
sumFlow = in.readLong();
}
@Override
public String toString() {
return upFlow + “\t” + downFlow + “,\t” + sumFlow;
}
public long getUpFlow() {
return upFlow;
}
public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
}
public long getDownFlow() {
return downFlow;
}
public void setDownFlow(long downFlow) {
this.downFlow = downFlow;
}
public long getSumFlow() {
return sumFlow;
}
public void setSumFlow(long sumFlow) {
this.sumFlow = sumFlow;
}
public void set(long sum_upFlow, long sum_downFlow) {
// TODO Auto-generated method stub
upFlow = sum_upFlow;
downFlow = sum_downFlow;
sumFlow = sum_upFlow + sum_downFlow;
}
}
2. 编写Mapper类
package com.atguigu.mr.flowsum;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class FlowCountMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
Text k = new Text();
FlowBean v = new FlowBean();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//获取一行
String line = value.toString();
//切割\t
String[] fileds = line.split(“\t”);
//封装对象
k.set(fileds[1]);
long upFlow = Long.parseLong(fileds[fileds.length - 3]);
long downFlow = Long.parseLong(fileds[fileds.length - 2]);
v.setUpFlow(upFlow);
v.setDownFlow(downFlow);
// v.set(upFlow,downFlow);
//写出
context.write(k, v);
}
}
3. 编写Reducer类
package com.atguigu.mr.flowsum;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class FlowCountReducer extends Reducer<Text, FlowBean, Text, FlowBean> {
FlowBean v = new FlowBean();
@Override
protected void reduce(Text key, Iterable values, Context context)
throws IOException, InterruptedException {
long sum_upFlow = 0;
long sum_downFlow = 0;
//累加求和
for (FlowBean flowBean : values) {
sum_upFlow += flowBean.getUpFlow();
sum_downFlow += flowBean.getDownFlow();
}
v.set(sum_upFlow, sum_downFlow);
//写出
context.write(key, v);
}
}
4. 编写Driver驱动类
package com.atguigu.mr.flowsum;
import java.io.File;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;