Hadoop下统计流量使用情况并对输出结果排序

最新推荐文章于 2024-06-05 15:19:38 发布

一个发际线正在升高的人

最新推荐文章于 2024-06-05 15:19:38 发布

阅读量264

点赞数

文章标签：大数据

本文链接：https://blog.csdn.net/weixin_45067832/article/details/109296231

版权

话不多说，直接进入正题
数据样本：
在这里插入图片描述
程序在hadoop下执行顺序：
写好改代码后，把这个打成jar包，发送到虚拟机中
执行代码：

hadoop jar 打出jar包的路径  含有main方法的类名  输入路径   输出路径
该程序执行命令是：

	hadoop jar /home/hadoop/wordcount.jar com.lmy.flowsum.SortMR /data/china_mobile.dat /data/wordcount1

对这个样本进行流量统计

未排序版本，首先声明个实体类：

package com.lmy.flowsum;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

public class FlowBean implements WritableComparable<FlowBean>{
	private String phoneNumber;
	private long upflow;
	private long downflow;
	private long sumflow;
	/**
	 * 无参构造
	 */
	public FlowBean() {
	}
	
	/**
	 * 有参构造
	 * @param phoneNumber
	 * @param upflow
	 * @param downflow
	 */
	
	public FlowBean(String phoneNumber, long upflow, long downflow) {
		super();
		this.phoneNumber = phoneNumber;
		this.upflow = upflow;
		this.downflow = downflow;
		this.sumflow = upflow + downflow;
	}



	public String getPhoneNumber() {
		return phoneNumber;
	}
	public void setPhoneNumber(String phoneNumber) {
		this.phoneNumber = phoneNumber;
	}
	public long getUpflow() {
		return upflow;
	}
	public void setUpflow(long upflow) {
		this.upflow = upflow;
	}
	public long getDownflow() {
		return downflow;
	}
	public void setDownflow(long downflow) {
		this.downflow = downflow;
	}
	public long getSumflow() {
		return sumflow;
	}
	public void setSumflow(long sumflow) {
		this.sumflow = sumflow;
	}
	
	@Override
	public void readFields(DataInput in) throws IOException {
		 phoneNumber = in.readUTF();
		 upflow=in.readLong();
		 downflow=in.readLong();
		 sumflow=in.readLong();
	}
	@Override
	public void write(DataOutput out) throws IOException {
		out.writeUTF(phoneNumber);
		out.writeLong(upflow);
		out.writeLong(downflow);
		out.writeLong(sumflow);
	}
	
	public String toString(){
		
		return upflow + "\t" + downflow +"\t" + sumflow;
	}

	@Override
	public int compareTo(FlowBean o) {
		
		return sumflow>o.getSumflow()?-1:1;
	}
	
	

}

然后分别写map/reduce程序：

package com.lmy.flowsum;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class FlowSumMapper  extends Mapper<LongWritable, Text, Text, FlowBean>{
	@Override
	protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, FlowBean>.Context context)
			throws IOException, InterruptedException {
		String line = value.toString();
		String fileds[] = StringUtils.split(line,"\t");
		String phoneNumber = fileds[1];
		
		long upflow = Long.parseLong(fileds[7]);
		long downflow = Long.parseLong(fileds[8]);
		
		context.write(new Text(phoneNumber), new FlowBean(phoneNumber,upflow,downflow));
	}
}

package com.lmy.flowsum;

import java.io.IOException;


import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.zookeeper.Op.Create;

public class FlowSumReduce extends Reducer<Text, FlowBean, Text, FlowBean>{
	@Override
	protected void reduce(Text key, Iterable<FlowBean> values, Reducer<Text, FlowBean, Text, FlowBean>.Context context)
			throws IOException, InterruptedException {
		long upflow_counter=0;
		long downflow_counter=0;
		
		for(FlowBean bean : values){
			upflow_counter = bean.getUpflow();
			downflow_counter = bean.getDownflow();
		}
		
		context.write(key, new FlowBean(key.toString(), upflow_counter, downflow_counter));
	}

}

最后写执行程序：

package com.lmy.flowsum;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;


import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class FlowSumRunner extends Configured implements Tool{

	@Override
	public int run(String[] args) throws Exception {
		// 获取配置信息
		Configuration conf = new Configuration();
		Job job = Job.getInstance(); 
		job.setJarByClass(FlowSumRunner.class);
		job.setMapperClass(FlowSumMapper.class);
		job.setReducerClass(FlowSumReduce.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(FlowBean.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(FlowBean.class);
		//  指定job的输入原始文件所在目录
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		return job.waitForCompletion(true)?0:1;
	

	}
	public static void main(String[] args) throws Exception {
		int res = ToolRunner.run(new Configuration(),new FlowSumRunner(), args);
		System.exit(res);
	}
 
}

这里面程序需要导入jar包，或者使用maven找到对应的自动下载

排序版：

package com.lmy.flowsum;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class SortMR {
	public static class SortMapper extends Mapper<LongWritable, Text, FlowBean, NullWritable>{
		@Override
		protected void map(LongWritable key, Text value,Context context)
				throws IOException, InterruptedException {
			String line = value.toString();
			String fileds[] = StringUtils.split(line,"\t");
			String phoneNumber = fileds[1];
			
			long upflow = Long.parseLong(fileds[7]);
			long downflow = Long.parseLong(fileds[8]);
			
			context.write(new FlowBean(phoneNumber,upflow,downflow),NullWritable.get());
		}
	}
	
	public static class SortReducer extends Reducer<FlowBean,NullWritable,Text,FlowBean>{
	
		@Override
		protected void reduce(FlowBean key, Iterable<NullWritable> values,Context context) throws IOException, InterruptedException{
			String phoneNumber = key.getPhoneNumber();
			context.write(new Text(phoneNumber), key);
			
		}
			
	}
	
	public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf); 
		job.setJarByClass(SortMR.class);
		job.setMapperClass(SortMapper.class);
		job.setReducerClass(SortReducer.class);
		
		job.setMapOutputKeyClass(FlowBean.class);
		job.setMapOutputValueClass(NullWritable.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(FlowBean.class);
		
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		System.exit(job.waitForCompletion(true)?0:1);
	 
	}
}