1.背景
在环形缓冲区向磁盘溢写到磁盘的过程中,是按照系统默认的分区计算公式进行计算,可以自定义指定的字段,按照一定的规则,将<key,value>键值对输出到分区中。
2.默认计算公式
(key.hashCode() & Integer.MAX_VALUE) % numReduceTasks
3.需求
将手机号输出到不同文件中
4.实现步骤
- 自定义类继承Partitioner,重写getPartitioner方法,返回的是分区号,符合条件的进入相同分区
- 在driver类中,手动设置自定义分区类job.setPartitionerClass(…class)
- 手动设置reducetask的数量job.setNumReduceTasks(num);
5.代码实现
1.bean对象
package com.zj.practice.mapreduce06.compatablePhoneFlow;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* 需求:
* 根据案例手机流量产生的结果再次对总流量进行排序。
* 13470253144 180,180,360
* 13509468723 7335,110349,117684
* 13560439638 918,4938,5856
* 13568436656 3597,25635,29232
* 13630577991 10619,4228,14847
* 13682846555 14477,9042,23519
* 13729199489 240,0,240
* 13736230513 2745,24681,27426
* 13768778790 120,120,240
* 13956435636 132,1512,1644
* 13966251146 240,0,240
* 13975057813 11058,48243,59301
* 15910133277 3156,2936,6092
* 15959002129 1938,180,2118
* 18271575951 6759,4492,11251
*/
public class CptFlowBean implements WritableComparable<CptFlowBean> {
/**
* 手机号
*/
private String phoneNum;
/**
* 上行流量
*/
private int upflow;
/**
* 下行流量
*/
private int downflow;
/**
* 总流量
*/
private int totalflow;
public CptFlowBean() {
super();
}
public CptFlowBean(String phoneNum, int upflow, int downflow, int totalflow) {
this.phoneNum = phoneNum;
this.upflow = upflow;
this.downflow = downflow;
this.totalflow = totalflow;
}
@Override
public String toString() {
return phoneNum + "\t" + upflow + "\t" + downflow + "\t" + totalflow;
}
public String getPhoneNum() {
return phoneNum;
}
public void setPhoneNum(String phoneNum) {
this.phoneNum = phoneNum;
}
public int getUpflow() {
return upflow;
}
public void