功能:流量统计
统计用户的流量使用,要分析的用户上网日志格式:
构建业务bean
package cn.itcast.mr.dc;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
/**
* write()和read()方法的调用时机,什么时候调用
* @author Administrator
*
*/
public class DataBean implements Writable{
private String account;
private long upPayload;
private long downPayload;
private long totalPayload;
public String getAccount() {
return account;
}
public void setAccount(String account) {
this.account = account;
}
public long getUpPayload() {
return upPayload;
}
public void setUpPayload(long upPayload) {
this.upPayload = upPayload;
}
public long getDownPayload() {
return downPayload;
}
public void setDownPayload(long downPayload) {
this.downPayload = downPayload;
}
public long getTotalPayload() {
return totalPayload;
}
public void setTotalPayload(long totalPayload) {
this.totalPayload = totalPayload;
}
public DataBean() {
}
@Override
public String toString() {
return upPayload + "\t" +downPayload + "\t" + totalPayload;
}
public DataBean(String account, long upPayload, long downPayload) {
super();
this.account = account;
this.upPayload = upPayload;
this.downPayload = downPayload;
this.totalPayload = upPayload + downPayload;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(account);
out.writeLong(upPayload);
out.writeLong(downPayload);
out.writeLong(totalPayload);
}
@Override
public void readFields(DataInput in) throws IOException {
this.account = in.readUTF();
this.upPayload = in.readLong();
this.downPayload = in.readLong();
this.totalPayload = in.readLong();
}
}
主方法:
package cn.itcast.mr.dc;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DataCount {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DataCount.class);
//设置map
job.setMapperClass(DCMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DataBean.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
//分区
job.setPartitionerClass(DCPartioner.class);
//规约(是一种特殊的reduce,在reduce阶段之前,map之后完成)
job.setCombinerClass(DCReduce.class);
//设置reducer的数量
job.setNumReduceTasks(Integer.parseInt(args[2]));
//设置reduce
job.setReducerClass(DCReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DataBean.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
public static class DCMapper extends Mapper<LongWritable, Text, Text, DataBean>{
protected void map(LongWritable key, Text value,Context context) throws java.io.IOException ,InterruptedException {
//读取键值对
String line = value.toString();
String[] splits = line.split("\t");
//map处理
String account = splits[1]; //手机号
Long upPayload = Long.parseLong(splits[8]);
Long downPayload = Long.parseLong(splits[9]);
DataBean dataBean = new DataBean(account, upPayload, downPayload);
context.write(new Text(account), dataBean);
};
}
public static class DCReduce extends Reducer<Text, DataBean, Text, DataBean>{
protected void reduce(Text text, java.lang.Iterable<DataBean> values, Context context) throws java.io.IOException ,InterruptedException {
//定义中间变量,sumPayload
long sumUp = 0;
long sumDown = 0;
for (DataBean dataBean : values) {
sumUp += dataBean.getUpPayload();
sumDown += dataBean.getDownPayload();
context.write(text, new DataBean("", sumUp, sumDown));
}
};
}
/**
* 分区,map之后
* @author Administrator
*
*/
public static class DCPartioner extends Partitioner<Text, DataBean>{
private static Map<String,Integer> map = new HashMap<>();
static {
map.put("137", 0);
map.put("138", 0);
map.put("139", 0);
map.put("159", 1);
map.put("150", 1);
}
@Override
public int getPartition(Text key, DataBean value, int numPartitions) {
String phone = key.toString();
String tmp = phone.substring(0, 3);
Integer rs = map.get(tmp);
if(rs == null)
rs = 2;
return rs;
}
}
}