大数据学习-Hadoop04-MapReduce-03-序列化&全排序

实验1:序列化

统计手机号耗费的总上行流量、下行流量、总流量

输入数据

数据格式:手机号 上传流量 下载流量

13965590001	13691	927138
13965590004	6925	647540
13965590002	20479	922428
13965590005	15883	57866
13965590003	13609	920184
13965590002	17979	386331
13965590006	3948	318871
13965590000	16823	935836
13965590006	10849	568449
13965590005	19065	791606
13965590008	13278	596751
13965590000	8468	400582
13965590000	16329	999656
13965590001	16797	642545
13965590007	19192	420749
13965590007	10598	381767
13965590007	7906	30797
13965590007	7914	348779
13965590002	22510	572017
13965590002	2786	441342
13965590008	7989	149352
13965590003	9686	257706

输出结果

数据格式:手机号 上传流量 下载流量 总流量

13965590000	41620	2336074	2377694
13965590001	30488	1569683	1600171
13965590002	63754	2322118	2385872
13965590003	23295	1177890	1201185
13965590004	6925	647540	654465
13965590005	34948	849472	884420
13965590006	14797	887320	902117
13965590007	45610	1182092	1227702
13965590008	21267	746103	767370

FlowBean.class

public class FlowBean implements Writable{

	/* 上传流量 */
	private long upFlow;
	/* 下载流量 */
	private long downFlow;
	/* 总流量 */
	private long sumFlow;

	public FlowBean() {
	}

	public FlowBean(long upFlow, long downFlow) {
		this.upFlow = upFlow;
		this.downFlow = downFlow;
		this.sumFlow = upFlow + downFlow;
	}

	public void setFlowBean(long upFlow, long downFlow) {
		this.upFlow = upFlow;
		this.downFlow = downFlow;
		this.sumFlow = upFlow + downFlow;
	}

	/**
	 * 反序列化方法:注意反序列化的顺序和序列化的顺序完全一致
	 */
	@Override
	public void readFields(DataInput in) throws IOException {
		this.upFlow = in.readLong();
		this.downFlow = in.readLong();
		this.sumFlow = in.readLong();
	}

	/**
	 * 序列化方法
	 */
	@Override
	public void write(DataOutput out) throws IOException {
		out.writeLong(upFlow);
		out.writeLong(downFlow);
		out.writeLong(sumFlow);
	}

	@Override
	public String toString() {
		return upFlow + "\t" + downFlow + "\t" + sumFlow;
	}

	// get and set...

}

FlowMapper.class

public class FlowMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
	Text k = new Text();
	FlowBean v = new FlowBean();

	@Override
	protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
		// 13965590002	916254	62132
		// 获取1行
		String line = value.toString();

		// 切割数据
		String[] datas = line.split("\t");
		
		// 封装数据
		String phone = datas[0];
		long upFlow = Long.parseLong(datas[1]);
		long downFlow = Long.parseLong(datas[2]);

		k.set(phone);

		v.setFlowBean(upFlow, downFlow);

		// 写入数据
		context.write(k, v);
	}

}

FlowReduce.class

public class FlowReduce extends Reducer<Text, FlowBean, Text, FlowBean> {
	@Override
	protected void reduce(Text key, Iterable<FlowBean> values, Context context)
			throws IOException, InterruptedException {
		// 累加求和
		int sunUpFlow = 0;
		int sunDownFlow = 0;
		for (FlowBean flowBean : values) {
			sunUpFlow += flowBean.getUpFlow();
			sunDownFlow += flowBean.getDownFlow();
		}
		FlowBean v = new FlowBean(sunUpFlow, sunDownFlow);
		context.write(key, v);
	}

}

FlowDriver.class

public class FlowDriver implements Tool {
	Configuration conf = null;

	@Override
	public Configuration getConf() {
		return conf;
	}

	@Override
	public void setConf(Configuration configuration) {
		this.conf = configuration;
	}

	@Override
	public int run(String[] args) throws Exception {
		// 获取 job
		Job job = Job.getInstance(conf);

		// 设置jar报位置
		job.setJarByClass(getClass());

		// 关联Mapper Reduce
		job.setMapperClass(FlowMapper.class);
		job.setReducerClass(FlowReduce.class);

		// 设置Mapper 输出 Key Value 类型
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(FlowBean.class);

		// 设置最终输出Key Value 类型
		job.setOutputKeyClass(Text.class);
		job.setOutputKeyClass(FlowBean.class);

		// 设置输入输出路径
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		// 提交job
		boolean code = job.waitForCompletion(true);
		return code ? 0 : 1;
	}

	public static void main(String[] args) throws Exception {
		args = new String[] { "e:/input/flow1", "e:/output/flow2" };
		int run = ToolRunner.run(new FlowDriver(), args);
		System.exit(run);
	}

}

实验2:全排序

对实验1产生的结果再次对总流量进行倒序排序(全排序)

  • 原理
    MR程序在处理数据的过程中会对数据排序(map输出的kv对传输到reduce之前,会排序),排序的依据是map输出的key 所以,我们如果要实现自己需要的排序规则,则可以考虑将排序因素放到key中,让key实现接口:WritableComparable。然后重写key的compareTo方法。

输入数据

数据格式:手机号 上传流量 下载流量

13965590000	41620	2336074	2377694
13965590001	30488	1569683	1600171
13965590002	63754	2322118	2385872
13965590003	23295	1177890	1201185
13965590004	6925	647540	654465
13965590005	34948	849472	884420
13965590006	14797	887320	902117
13965590007	45610	1182092	1227702
13965590008	21267	746103	767370

输出数据

手机号 上传总流量 下载总流量 总流量(降序)

13965590002	63754	2322118	2385872
13965590000	41620	2336074	2377694
13965590001	30488	1569683	1600171
13965590007	45610	1182092	1227702
13965590003	23295	1177890	1201185
13965590006	14797	887320	902117
13965590005	34948	849472	884420
13965590008	21267	746103	767370
13965590004	6925	647540	654465

FlowBean.class

在实验1 的基础上,进行修改,实现 WritableComparable接口

public class FlowBean implements WritableComparable<FlowBean> {
	@Override
	public int compareTo(FlowBean fb) {
		if (fb.getSumFlow() < this.getSumFlow())
			return 0;
		return 1;
	}
}

FlowSortMapper.class

  • 实验1:Mapper<LongWritable, Text, Text, FlowBean>
  • 实验2:Mapper<LongWritable, Text, FlowBean, Text>
  • 排序的依据是map输出的key,所以实验2 map的key为FlowBean类型。
public class FlowSortMapper extends Mapper<LongWritable, Text, FlowBean, Text> {
	Text v = new Text();
	FlowBean k = new FlowBean();

	@Override
	protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
		// 13965590002 916254 62132
		// 获取1行
		String line = value.toString();

		// 切割数据
		String[] datas = line.split("\t");

		// 封装数据
		String phone = datas[0];
		long upFlow = Long.parseLong(datas[1]);
		long downFlow = Long.parseLong(datas[2]);

		v.set(phone);

		k.setFlowBean(upFlow, downFlow);

		// 写入数据
		context.write(k, v);

	}

}

FlowSortReduce.class

public class FlowSortReduce extends Reducer<FlowBean, Text, Text, FlowBean> {
	@Override
	protected void reduce(FlowBean key, Iterable<Text> values, Context context)
			throws IOException, InterruptedException {
		// reduce方法则直接输出结果即可
		for (Text text : values) {
			context.write(text, key);
		}
	}
}

FlowSortDriver.class

// 关联Mapper Reduce
job.setMapperClass(FlowSortMapper.class);
job.setReducerClass(FlowSortReduce.class);

// 设置Mapper 输出 Key Value 类型
job.setMapOutputKeyClass(FlowBean.class);
job.setMapOutputValueClass(Text.class);
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值