Hadoop Mapreduce 手机流量统计
(1)分析业务需求:
用户使用手机上网,存在流量的消耗。流量包括两个部分:
其一上行流量(发送信息流量)
其二下行流量(接收信息流量)
每种流量在网络传输过程中,有两种形式说明:
包的大小、流量的大小。
使用手机上网,以手机号为唯一标示附,进行记录。
实际需要的字段:
手机号码、上行(下行)数据包数、上行(下行)总流量、
(2)自定义数据类型(五个字段):
DataWritable 实现
(3)M—R阶段
Map阶段:从文件中获取数据,抽取出需要的五个字段,输出的Key是手机号啊,输出的Value为数据量的类型DataWritable对象
Reduce阶段:将相同手机号码的Value中的数据流量进行相加,得出手机流量的总数(数据包和数据流量),输出到文件中,以制表符分开。
自定义数据类型:DataWritable:
package hadoop.mr;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
public class DataWritable implements Writable{
//UpLoad
private int upPackNum;//上行包数
private int upPayLoad;//上行流量
//DownLoad
private int downPackNum;
private int downPayLoad;
public DataWritable() {
super();
// TODO Auto-generated constructor stub
}
public void set(int upPackNum,int upPayLoad,int downPackNum,int downPayLoad) {
this.upPackNum = upPackNum;
this.upPayLoad = upPayLoad;
this.downPackNum = downPackNum;
this.downPayLoad = downPayLoad;
}
public int getUpPackNum() {
return upPackNum;
}
public int getUpPayLoad() {
return upPayLoad;
}
public int getDownPackNum() {
return downPackNum;
}
public int getDownPayLoad() {
return downPayLoad;
}
@Override
public void readFields(DataInput in) throws IOException {
this.upPackNum=in.readInt();
this.upPayLoad=in.readInt();
this.downPackNum=in.readInt();
this.downPayLoad=in.readInt();
}
@Override
public void write(DataOutput out) throws IOException {
out.write(upPackNum);
out.write(upPayLoad);
out.write(downPackNum);
out.write(downPayLoad);
}
@Override
public String toString() {
return upPackNum + "\t" + upPayLoad + "\t" + downPackNum
+ "\t" + downPayLoad ;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + downPackNum;
result = prime * result + downPayLoad;
result = prime * result + upPackNum;
result = prime * result + upPayLoad;
return result;
}
}
Mapreduce大型项目书写流程:
<div>package hadoop.mr;</div><div>
</div><div>/**
* @author zzh
* @time
* 注:reduce的output=job的output
*/
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;</div><div>
</div><div>public class DataTotalMapReduce {</div><div>
</div><div> //一、Mapper Class
static class DataTotalMapper extends
Mapper<LongWritable, Text, Text, DataWritable> {</div><div>
</div><div> private Text mapOutputKey = new Text();
private DataWritable dataWritable = new DataWritable();
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {</div><div>
</div><div> String lineValue=value.toString();
String[] strs = lineValue.split("\t");
//set data
String phoneNum = strs[1];
int upPackNum = Integer.valueOf(strs[6]);
int downPackNum = Integer.valueOf(strs[7]);
int upPayLoad = Integer.valueOf(strs[8]);
int downPayLoad = Integer.valueOf(strs[9]);
mapOutputKey.set(phoneNum);
dataWritable.set(upPackNum, upPayLoad, downPackNum, downPayLoad);
context.write(mapOutputKey, dataWritable);
}
}
//二、Reducer Class
static class DataTotalReducer
extends Reducer<Text, DataWritable, Text, DataWritable>{
private DataWritable dataWritable = new DataWritable();
@Override
protected void reduce(Text key, Iterable<DataWritable> values,
Context context) throws IOException, InterruptedException {</div><div>
</div><div> int upPackNum = 0;
int downPackNum = 0;
int upPayLoad = 0;
int downPayLoad = 0;
for (DataWritable data : values) {
upPackNum += data.getUpPackNum();
downPackNum += data.getDownPackNum();
upPayLoad += data.getUpPayLoad();
downPayLoad += data.getDownPayLoad();
}
dataWritable.set(upPackNum, upPayLoad, downPackNum, downPayLoad);
context.write(key, dataWritable);
}
}
//三、Driver Code
public int run(String[] args) throws Exception{
//①get conf
Configuration conf=new Configuration();
//②create job
Job job=new Job(conf, DataTotalMapReduce.class.getSimpleName());
//③set job
job.setJarByClass(DataTotalMapReduce.class);
//1)input
Path inputDir = new Path(args[0]);
FileInputFormat.setInputPaths(job, inputDir);
//2)map
job.setMapperClass(DataTotalMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DataWritable.class);
//3)reduce
job.setReducerClass(DataTotalReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DataWritable.class);
//4)output
Path outputDir=new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputDir);
//④submit job
boolean isSuccess= job.waitForCompletion(true);
//⑤return status
return isSuccess?0:1;
}
//四、run mapreduce
public static void main(String[] args) throws Exception {
//①set args
args = new String[]{
//input path
"hdfs://hadoop-master:9000/data/wc/input",
//oupput path
"hdfs://hadoop-master:9000/data/wc/output"
};
//②run job
int status=new DataTotalMapReduce().run(args);
//③exit
System.exit(status);
}
}
</div>