基于Mapreduce的手机流量统计——Mapreduce大型项目书写流程

最新推荐文章于 2024-06-05 00:04:54 发布

江湖小小虾

最新推荐文章于 2024-06-05 00:04:54 发布

阅读量2.3k

点赞数

分类专栏： hadoop

本文链接：https://blog.csdn.net/lishuan182/article/details/51894556

版权

hadoop 专栏收录该内容

5 篇文章 1 订阅

订阅专栏

Hadoop Mapreduce 手机流量统计

（1）分析业务需求：

用户使用手机上网，存在流量的消耗。流量包括两个部分：

其一上行流量（发送信息流量）

其二下行流量（接收信息流量）

每种流量在网络传输过程中，有两种形式说明：

包的大小、流量的大小。

使用手机上网，以手机号为唯一标示附，进行记录。

实际需要的字段：

手机号码、上行（下行）数据包数、上行（下行）总流量、

（2）自定义数据类型（五个字段）：

DataWritable 实现

（3）M—R阶段

Map阶段：从文件中获取数据，抽取出需要的五个字段，输出的Key是手机号啊，输出的Value为数据量的类型DataWritable对象

Reduce阶段：将相同手机号码的Value中的数据流量进行相加，得出手机流量的总数（数据包和数据流量），输出到文件中，以制表符分开。

自定义数据类型：DataWritable：

package hadoop.mr;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

public class DataWritable implements Writable{

	//UpLoad
	private int upPackNum;//上行包数
	private int upPayLoad;//上行流量
	//DownLoad
	private int downPackNum;
	private int downPayLoad;
	
	
	public DataWritable() {
		super();
		// TODO Auto-generated constructor stub
	}


	public void set(int upPackNum,int upPayLoad,int downPackNum,int downPayLoad) {
		this.upPackNum = upPackNum;
		this.upPayLoad = upPayLoad;
		this.downPackNum = downPackNum;
		this.downPayLoad = downPayLoad;
	}

	
	public int getUpPackNum() {
		return upPackNum;
	}


	public int getUpPayLoad() {
		return upPayLoad;
	}


	public int getDownPackNum() {
		return downPackNum;
	}


	public int getDownPayLoad() {
		return downPayLoad;
	}


	@Override
	public void readFields(DataInput in) throws IOException {

		this.upPackNum=in.readInt();
		this.upPayLoad=in.readInt();
		this.downPackNum=in.readInt();
		this.downPayLoad=in.readInt();
	}

	@Override
	public void write(DataOutput out) throws IOException {

		out.write(upPackNum);
		out.write(upPayLoad);
		out.write(downPackNum);
		out.write(downPayLoad);
	}


	@Override
	public String toString() {
		return upPackNum + "\t" + upPayLoad + "\t" + downPackNum
				+ "\t" + downPayLoad ;
	}


	@Override
	public int hashCode() {
		final int prime = 31;
		int result = 1;
		result = prime * result + downPackNum;
		result = prime * result + downPayLoad;
		result = prime * result + upPackNum;
		result = prime * result + upPayLoad;
		return result;
	}

}

Mapreduce大型项目书写流程：

<div>package hadoop.mr;</div><div>
</div><div>/**
 * @author zzh
 * @time
 * 注：reduce的output=job的output
 */
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;</div><div>
</div><div>public class DataTotalMapReduce {</div><div>
</div><div> //一、Mapper Class
 static class DataTotalMapper extends 
  Mapper<LongWritable, Text, Text, DataWritable> {</div><div>
</div><div>  private Text mapOutputKey = new Text();
  private DataWritable dataWritable = new DataWritable();
  
  @Override
  protected void map(LongWritable key, Text value,Context context)
    throws IOException, InterruptedException {</div><div>
</div><div>   String lineValue=value.toString();
   String[] strs = lineValue.split("\t");
   //set data
   String phoneNum = strs[1];
   int upPackNum = Integer.valueOf(strs[6]);
   int downPackNum = Integer.valueOf(strs[7]);
   int upPayLoad = Integer.valueOf(strs[8]);
   int downPayLoad = Integer.valueOf(strs[9]);
   
   mapOutputKey.set(phoneNum);
   dataWritable.set(upPackNum, upPayLoad, downPackNum, downPayLoad);
   
   context.write(mapOutputKey, dataWritable);
  }
 }
 
 //二、Reducer Class
 static class DataTotalReducer 
  extends Reducer<Text, DataWritable, Text, DataWritable>{
  
  private DataWritable dataWritable = new DataWritable();
  @Override
  protected void reduce(Text key, Iterable<DataWritable> values,
    Context context) throws IOException, InterruptedException {</div><div>
</div><div>   int upPackNum = 0;
   int downPackNum = 0;
   int upPayLoad = 0;
   int downPayLoad = 0;
   
   for (DataWritable data : values) {
    upPackNum += data.getUpPackNum();
    downPackNum += data.getDownPackNum();
    upPayLoad += data.getUpPayLoad();
    downPayLoad += data.getDownPayLoad();
   }
   
   dataWritable.set(upPackNum, upPayLoad, downPackNum, downPayLoad);
   context.write(key, dataWritable);
  }
 }
 
 //三、Driver Code 
 public int run(String[] args) throws Exception{
  //①get conf
  Configuration conf=new Configuration();
  
  //②create job
  Job job=new Job(conf, DataTotalMapReduce.class.getSimpleName());
  
  //③set job
  job.setJarByClass(DataTotalMapReduce.class);
   //1)input
   Path inputDir = new Path(args[0]);
   FileInputFormat.setInputPaths(job, inputDir);
   
   //2)map
   job.setMapperClass(DataTotalMapper.class);
   job.setMapOutputKeyClass(Text.class);
   job.setMapOutputValueClass(DataWritable.class);
   
   //3）reduce
   job.setReducerClass(DataTotalReducer.class);
   job.setOutputKeyClass(Text.class);
   job.setOutputValueClass(DataWritable.class);
   
   //4)output
   Path outputDir=new Path(args[1]);
   FileOutputFormat.setOutputPath(job, outputDir);
   
  
  //④submit job
  boolean isSuccess= job.waitForCompletion(true);
  
  //⑤return status
  return isSuccess?0:1;
 }
 
 //四、run mapreduce
 public static void main(String[] args) throws Exception {
  //①set args
  args = new String[]{
    //input path
    "hdfs://hadoop-master:9000/data/wc/input",
    //oupput path
    "hdfs://hadoop-master:9000/data/wc/output"
  };
  
  //②run job
  int status=new DataTotalMapReduce().run(args);
  
  //③exit 
  System.exit(status);
 }
}
</div>