一、问题描述
根据所给的数据输出每一个手机号上网的上载流量、下载流量和总流量。
二、数据格式
输入数据(部分)格式
1363157973098 15013685858 5C-0E-8B-C7-F7-90:CMCC 120.197.40.4 rank.ie.sogou.com 搜索引擎 28 27 3659 3538 200
1363157986029 15989002119 E8-99-C4-4E-93-E0:CMCC-EASY 120.196.100.99 www.umeng.com 站点统计 3 3 1938 180 200
1363157992093 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 15 9 918 4938 200
1363157986041 13480253104 5C-0E-8B-C7-FC-80:CMCC-EASY 120.197.40.4 3 3 180 180 200
1363157984040 13602846565 5C-0E-8B-8B-B6-00:CMCC 120.197.40.4 2052.flash2-http.qq.com 综合门户 15 12 1938 2910 200
输出数据格式
手机号 上载流量(总) 下载流量(总) 总流量
三、代码实现
DataCount:
package edu.jianwei.hadoop.mr;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DataCount {
static class DCMapper extends Mapper<LongWritable, Text, Text, DataBean>{
private Text k=new Text();
private DataBean v=new DataBean();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line=value.toString();
String[] words=line.split("\t");
String telNum=words[1];
double upLoad=Double.parseDouble(words[8]);
double downLoad=Double.parseDouble(words[9]);
k.set(telNum);
v.Set(telNum, upLoad, downLoad);
context.write(k, v);
}
}
static class DCReduce extends Reducer<Text,DataBean, Text, DataBean>{
private DataBean v=new DataBean();
@Override
protected void reduce(Text key, Iterable<DataBean> v2s,
Context context)
throws IOException, InterruptedException {
double upTotal=0;
double downToal=0;
for (DataBean d : v2s) {
upTotal+=d.getUpLoad();
downToal+=d.getDownload();
}
v.Set("", upTotal, downToal);
context.write(key, v);
}
}
public static void main(String[] args) throws Exception {
Configuration conf=new Configuration();
Job job=Job.getInstance();
job.setJarByClass(DataCount.class);
job.setMapperClass(DCMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DataBean.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
job.setReducerClass(DCReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DataBean.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
DataBean:
package edu.jianwei.hadoop.mr;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
public class DataBean implements Writable {
private String telNum;
private double upLoad;
private double download;
private double total;
public void Set(String telnum,double upload,double download){
this.telNum=telnum;
this.upLoad=upload;
this.download=download;
this.total=upload+download;
}
public void write(DataOutput out) throws IOException {
out.writeUTF(telNum);
out.writeDouble(upLoad);
out.writeDouble(download);
out.writeDouble(total);
}
public void readFields(DataInput in) throws IOException {
this.telNum=in.readUTF();
this.upLoad=in.readDouble();
this.download=in.readDouble();
this.total=in.readDouble();
}
public String getTelNum() {
return telNum;
}
public void setTelNum(String telNum) {
this.telNum = telNum;
}
public double getUpLoad() {
return upLoad;
}
public void setUpLoad(double upLoad) {
this.upLoad = upLoad;
}
public double getDownload() {
return download;
}
public void setDownload(double download) {
this.download = download;
}
public double getTotal() {
return total;
}
public void setTotal(double total) {
this.total = total;
}
@Override
public String toString() {
// TODO Auto-generated method stub
return upLoad+"\t"+download+"\t"+total;
}
}
四、代码运行
1.运行代码
hadoop jar /root/dc.jar /dc /dc/res
2.代码运行结果(部分)
13560436666 1116.0 954.0 2070.0
13560439658 2034.0 5892.0 7926.0
13602846565 1938.0 2910.0 4848.0
13660577991 6960.0 690.0 7650.0
13719199419 240.0 0.0 240.0
13726230503 2481.0 24681.0 27162.0