2.mapreduce程序统计上行下行流量
①FlowBean类:
package cn.mr.flowSum;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import com.sun.corba.se.impl.oa.toa.TOA;
public class FlowBean implements WritableComparable<FlowBean>{
private String phoneNUM ;
private long up_flow;
private long down_flow;
private long total_flow;
//反射时防止程序报错,所以定义没有参数的构造方法
public FlowBean(){}
public FlowBean(String phoneNUM,long up_flow,long down_flow){
this.phoneNUM = phoneNUM;
this.up_flow = up_flow;
this.down_flow = down_flow;
this.total_flow = up_flow + down_flow;
}
public long getTotal_flow() {
return total_flow;
}
public void setTotal_flow(long total_flow) {
this.total_flow = total_flow;
}
public String getPhoneNUM() {
return phoneNUM;
}
public void setPhoneNUM(String phoneNUM) {
this.phoneNUM = phoneNUM;
}
public long getUp_flow() {
return up_flow;
}
public void setUp_flow(long up_flow) {
this.up_flow = up_flow;
}
public long getDown_flow() {
return down_flow;
}
public void setDown_flow(long down_flow) {
this.down_flow = down_flow;
}
//要在hadoop的各个节点之间传输,应该遵循hadoop的序列化机制 实现实现hadoop相应的序列化接口WritableComparable,重写方法
@Override
public void readFields(DataInput datainput) throws IOException {
// TODO Auto-generated method stub
phoneNUM = datainput.readUTF();
up_flow = datainput.readLong();
down_flow = datainput.readLong();
total_flow = datainput.readLong();
}
//要在hadoop的各个节点之间传输,应该遵循hadoop的序列化机制 实现实现hadoop相应的序列化接口WritableComparable,重写方法
@Override
public void write(DataOutput dataoutput) throws IOException {
// TODO Auto-generated method stub
dataoutput.writeUTF(phoneNUM);
dataoutput.writeLong(up_flow);
dataoutput.writeLong(down_flow);
dataoutput.writeLong(total_flow);
}
@Override
public String toString() {
// TODO Auto-generated method stub
return "" + phoneNUM + '\t' + up_flow + '\t' + down_flow + '\t' + total_flow;
}
//实现了 Comparable 接口的类通过实现 comparaTo 方法从而确定该类对象的排序方式。
@Override
public int compareTo(FlowBean o) {
// TODO Auto-generated method stub
return total_flow > o.getTotal_flow() ? -1 : 1;
}
}
②mapper程序:
package cn.mr.flowSum;
import java.io.IOException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* FlowBean 是我们自定义的一种数据类型,要在hadoop的各个节点之间传输,应该遵循hadoop的序列化机制
* 就必须实现hadoop相应的序列化接口
*
*/
public class flowSumMapper extends Mapper<LongWritable, Text, Text, FlowBean>{
protected void map(LongWritable key, Text value, Mapper<LongWritable,Text,Text,FlowBean>.Context context) throws IOException ,InterruptedException {
String line = value.toString();
//tab进行分割
String []fields = StringUtils.split(line, "\t");
//取第一个为phonnum
String phoneNUM = fields[0];
long up_flow = Long.parseLong(fields[1]);
long down_flow = Long.parseLong(fields[2]);
//定义flowbean对象,并进行初始化
FlowBean flowBean = new FlowBean(phoneNUM,up_flow,down_flow);
context.write(new Text(phoneNUM), flowBean);
};
}
③reduce程序:
package cn.mr.flowSum;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class flowSumReducer extends Reducer<Text, FlowBean, Text, FlowBean>{
protected void reduce(Text key, Iterable<FlowBean> flowBeans, Context context) throws IOException ,InterruptedException {
long up_flow_sum = 0;
long down_flow_sum = 0;
for(FlowBean flowBean:flowBeans){
String phoneNUM = flowBean.getPhoneNUM();
up_flow_sum += flowBean.getUp_flow();
down_flow_sum += flowBean.getDown_flow();
}
//这里value写入文本的格式是 "" + phoneNUM + '\t' + up_flow + '\t' + down_flow + '\t' + total_flow
//FlowBean方法的toString()
context.write(key, new FlowBean(key.toString(), up_flow_sum, down_flow_sum));
};
}
④程序入口:
package cn.mr.flowSum;
import java.io.FileOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class flowSumRunner extends Configured implements Tool{
@Override
public int run(String[] as) throws Exception {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(flowSumRunner.class);
job.setMapperClass(flowSumMapper.class);
job.setReducerClass(flowSumReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlowBean.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
FileInputFormat.setInputPaths(job, new Path(as[0]));
FileOutputFormat.setOutputPath(job, new Path(as[1]));
return job.waitForCompletion(true)?0:1;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new flowSumRunner(), args);
System.exit(res);
}
}