java中mapreduce求和,mapreduce【流量统计】求和——自定义数据类型

需求:统计一下文件中,每一个用户所耗费的总上行流量,总下行流量,总流量

1363157985066 1372623050300-FD-07-A4-72-B8:CMCC120.196.100.82i02.c.aliimg.com2427248124681200

1363157995052 138265441015C-0E-8B-C7-F1-E0:CMCC120.197.40.4402640200

1363157991076 1392643565620-10-7A-28-CC-0A:CMCC120.196.100.99241321512200

1363154400022 139262511065C-0E-8B-8B-B1-50:CMCC120.197.40.4402400200

1363157993044 1821157596194-71-AC-CD-E6-18:CMCC-EASY120.196.100.99iface.qiyi.com视频网站151215272106200

1363157995074 841384135C-0E-8B-8C-E8-20:7DaysInn120.197.40.4122.72.52.12201641161432200

1363157993055 13560439658C4-17-FE-BA-DE-D9:CMCC120.196.100.9918151116954200

1363157995033 159201332575C-0E-8B-C7-BA-20:CMCC120.197.40.4sug.so.360.cn信息安全202031562936200

1363157983019 1371919941968-A1-B7-03-07-B1:CMCC-EASY120.196.100.82402400200

1363157984041 136605779915C-0E-8B-92-5C-20:CMCC-EASY120.197.40.4s19.cnzz.com站点统计2496960690200

1363157973098 150136858585C-0E-8B-C7-F7-90:CMCC120.197.40.4rank.ie.sogou.com搜索引擎282736593538200

1363157986029 15989002119E8-99-C4-4E-93-E0:CMCC-EASY120.196.100.99www.umeng.com站点统计331938180200

1363157992093 13560439658C4-17-FE-BA-DE-D9:CMCC120.196.100.991599184938200

1363157986041 134802531045C-0E-8B-C7-FC-80:CMCC-EASY120.197.40.433180180200

1363157984040 136028465655C-0E-8B-8B-B6-00:CMCC120.197.40.42052.flash2-http.qq.com综合门户151219382910200

1363157995093 1392231446600-FD-07-A2-EC-BA:CMCC120.196.100.82img.qfc.cn121230083720200

1363157982040 135024688235C-0A-5B-6A-0B-D4:CMCC-EASY120.196.100.99y0.ifengimg.com综合门户571027335110349200

1363157986072 1832017338284-25-DB-4F-10-1A:CMCC-EASY120.196.100.99input.shouji.sogou.com搜索引擎211895312412200

1363157990043 1392505741300-1F-64-E1-E6-9A:CMCC120.196.100.55t3.baidu.com搜索引擎69631105848243200

1363157988072 1376077871000-FD-07-A4-7B-08:CMCC120.196.100.8222120120200

1363157985066 1372623888800-FD-07-A4-72-B8:CMCC120.196.100.82i02.c.aliimg.com2427248124681200

1363157993055 13560436666C4-17-FE-BA-DE-D9:CMCC120.196.100.9918151116954200

思路:map阶段:将每一行按tab切分成各字段,提取其中的手机号作为输出key,流量信息封装到FlowBean对象中,作为输出的value

要点:自定义类型如何实现Hadoop的序列化接口

FlowBean:这种自定义数据类型必须实现Hadoop的序列化接口:Writable

实现其中的两个方法:

1.readFields(in)——反序列化方法

2.write(out)——序列化方法

reduce阶段:遍历一组数据的所有value(flowbean),进行累加,然后以手机号作为key输出,以总流量信息bean作为value输出。

代码实现

1.FlowBean

import org.apache.hadoop.io.Writable;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

/**

* 本案例功能:演示自定义数据类型如何实现Hadoop的序列化接口

* 1,该类一定要保留空参构造器

* 2.write方法中输出字段二进制数据的顺序要与readFiles方法读取数据的顺序一致

*/

public class FlowBean implements Writable {

private int upFlow;

private int dFlow;

private String phone;

private int amountFlow;

public int getUpFlow() {

return upFlow;

}

public void setUpFlow(int upFlow) {

this.upFlow = upFlow;

}

public int getdFlow() {

return dFlow;

}

public void setdFlow(int dFlow) {

this.dFlow = dFlow;

}

public int getAmountFlow() {

return amountFlow;

}

public void setAmountFlow(int amountFlow) {

this.amountFlow = amountFlow;

}

public FlowBean() {

}

public FlowBean(int upFlow, int dFlow,String phone) {

this.upFlow = upFlow;

this.dFlow = dFlow;

this.phone=phone;

this.amountFlow=upFlow+dFlow;

}

/**

* hadoop 系统在序列化该类的对象时要调用得方法

* @param dataOutput

* @throws IOException

*/

public void write(DataOutput dataOutput) throws IOException {

dataOutput.writeInt(upFlow);

dataOutput.writeUTF(phone);

dataOutput.writeInt(dFlow);

dataOutput.writeInt(amountFlow);

}

/**

* hadoop系统在反序列化时要调用的方法

* @param dataInput

* @throws IOException

*/

public void readFields(DataInput dataInput) throws IOException {

this.upFlow=dataInput.readInt();

this.phone=dataInput.readUTF();

this.dFlow=dataInput.readInt();

this.amountFlow=dataInput.readInt();

}

@Override

public String toString() {

return this.upFlow+","+this.dFlow+","+this.amountFlow;

}

}

2.FlowCountMapper

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class FlowCountMapper extends Mapper {

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String line = value.toString();

String[] fields = line.split("\t");

String phone = fields[1];

int upFlow=Integer.parseInt(fields[fields.length-3]);

int dFlow=Integer.parseInt(fields[fields.length-2]);

context.write(new Text(phone),new FlowBean(upFlow,dFlow,phone));

}

}

3.FlowCountReduce

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowCountReduce extends Reducer {

/**

*

* @param key:手机号

* @param values:某个手机号所产生的所有访问记录中的流量数据

* @param context

* @throws IOException

* @throws InterruptedException

*/

@Override

protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {

int upSum=0;

int dSum=0;

for(FlowBean value:values){

upSum +=value.getUpFlow();

dSum +=value.getdFlow();

}

context.write(key,new FlowBean(upSum,dSum,key.toString()));

}

}

4.JobSubmitter

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class JobSubmitter{

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();

Job job = Job.getInstance(conf);

job.setJarByClass(JobSubmitter.class);

job.setMapperClass(FlowCountMapper.class);

job.setReducerClass(FlowCountReduce.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(FlowBean.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(FlowBean.class);

FileInputFormat.setInputPaths(job,new Path("F:\\mrdata\\flow\\input"));

FileOutputFormat.setOutputPath(job,new Path("F:\\mrdata\\flow\\output"));

boolean res = job.waitForCompletion(true);

System.exit(res ? 0:-1);

}

}

5.JobSubmitter程序运行统计结果【手机号 上行流量 下行流量 总流量】

13480253104180,180,360

135024688237335,110349,117684

135604366661116,954,2070

135604396582034,5892,7926

136028465651938,2910,4848

136605779916960,690,7650

13719199419240,0,240

137262305032481,24681,27162

137262388882481,24681,27162

13760778710120,120,240

13826544101264,0,264

139223144663008,3720,6728

1392505741311058,48243,59301

13926251106240,0,240

13926435656132,1512,1644

150136858583659,3538,7197

159201332573156,2936,6092

159890021191938,180,2118

182115759611527,2106,3633

183201733829531,2412,11943

841384134116,1432,5548

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值