第二篇实习日记
手机流量统计项目实现(上)
实现思路:
- 根据手机号进行分组,然后把该手机号对应的上下行流量加起来
- Mapper: 把手机号、上行流量、下行流量拆开
把手机号作为key,把Access作为value写出去 - Reducer形如:(“手机号”,<Access,Access>)
- 自定义分区类(需要继承Partitioner抽象类),并覆写getPartition()方法
具体操作:
1)自定义Access类
包括属性:手机号、上行流量、下行流量、总流量
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
//1实现writable方法
public class FlowBean implements Writable{
private long upflow;
private long downflow;
private long sumflow;
//必须要有空参构造,为了以后反射用
public FlowBean() {
super();
}
public FlowBean(long upflow, long downflow) {
super();
this.upflow = upflow;
this.downflow = downflow;
this.sumflow = upflow+downflow;
}
public void set(long upflow, long downflow) {
this.upflow = upflow;
this.downflow = downflow;
this.sumflow = upflow+downflow;
}
//序列化的方法
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(upflow);
out.writeLong(downflow);
out.writeLong(sumflow);
//反序列化方法
//注意序列化方法和反序列化方法顺序必须保持一致
}
@Override
public void readFields(DataInput in) throws IOException {
this.upflow=in.readLong();
this.downflow=in.readLong();
this.sumflow=in.readLong();
}
@Override
public String toString() {
return upflow + "\t" + downflow + "\t" + sumflow;
}
public void setUpflow(long upflow) {
this.upflow = upflow;
}
public long getUpflow() {
return upflow;
}
public long getDownflow() {
return downflow;
}
public void setDownflow(long downflow) {
this.downflow = downflow;
}
public long getSumflow() {
return sumflow;
}
public void setSumflow(long sumflow) {
this.sumflow = sumflow;
}
}
(2)自定义Map任务类(Map Task)
对每一行日志内容进行拆分,Map输出数据为:
phone==>Access(手机号,该行手机号的上行流量,该行手机号的下行流量)
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class FlowMapper extends Mapper<LongWritable, Text, Text, FlowBean>{
Text k=new Text();
FlowBean v=new FlowBean();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line=value.toString();
String[] fields=line.split("\t");
String phNum=fields[1];
long upFlow=Long.parseLong(fields[fields.length-3]);
long downFlow=Long.parseLong(fields[fields.length-2]);
k.set(phNum);
v.set(upFlow,downFlow);
context.write(k, v);
}
}