数据如下
1363157982040 13502468823 5C-0A-5B-6A-0B-D4:CMCC-EASY 120.196.100.99 y0.ifengimg.com 综合门户 57 102 7335 110349 200
1363157986072 18320173382 84-25-DB-4F-10-1A:CMCC-EASY 120.196.100.99 input.shouji.sogou.com 搜索引擎 21 18 9531 2412 200
1363157990043 13925057413 00-1F-64-E1-E6-9A:CMCC 120.196.100.55 t3.baidu.com 搜索引擎 69 63 11058 48243 200
需求:统计文件中每个用户所耗费的总上行流量,总下行流量,总流量
要点:自定义类型实现hadoop的序列化接口
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
/*
*
*
* 自定义数据类型如何实现hadoop序列化接口
* 1、该类一定要保留空参构造函数
* 2、write方法中输出字段二进制数据的顺序要与readFieds方法读取数据的顺序一致
*
* */
public class FlowBean implements Writable {
private int upFlow;
private int dFlow;
private String phone;
private int amountFlow;
//必须有无参构造器,才能序列化
public FlowBean() {
// TODO Auto-generated constructor stub
}
public FlowBean(String phone, int upFlow, int dFlow) {
this.phone = phone;
this.upFlow = upFlow;
this.dFlow = dFlow;
this.phone = phone;
this.amountFlow = dFlow + upFlow;
}
public int getUpFlow() {
return upFlow;
}
public void setUpFlow(int upFlow) {
this.upFlow = upFlow;
}
public int getdFlow() {
return dFlow;
}
public void setdFlow(int dFlow) {
this.dFlow = dFlow;
}
public String getPhone() {
return phone;
}
public void setPhone(String phone) {
this.phone = phone;
}
public int getAmountFlow() {
return amountFlow;
}
public void setAmountFlow(int amountFlow) {
this.amountFlow = amountFlow;
}
@Override
public String toString() {
// TODO Auto-generated method stub
return this.phone + "," + this.upFlow + "," + this.dFlow + "," + this.amountFlow;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(phone);
out.writeInt(upFlow);
out.writeInt(dFlow);
out.writeInt(amountFlow);
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.phone = in.readUTF();
this.upFlow = in.readInt();
this.dFlow = in.readInt();
this.amountFlow = in.readInt();
}
}
然后是MapReduce主程序