Shuffle机制
Shuffle是在Mapper之后,Reducer之前的操作
分区
默认分区时,若numReduceTask>1,会根据所求key的hashcode值进行分区
设置MAX_VALUES的目的是为了防止hashcode过大
分区时按照条件的不同进行分区,有几个分区就会有几个reduce
若numReduceTask=1,直接输出0号文件
分区主要需要写四个类,分别是partitioner类,mapper类,reducer类,driver类
导包的时候导最长的,最长的是最新的
//partitioner类
public class ProvicePartitioner extends Partitioner<Text, FlowBean> {
@Override
public int getPartition(Text key, FlowBean value, int numPartitions) {
// 1 获取电话号码的前三位
String preNum = key.toString().substring(0, 3);
int partition = 4;
// 2 判断是哪个省
if ("136".equals(preNum)) {
partition = 0;
} else if ("137".equals(preNum)) {
partition = 1;
} else if ("138".equals(preNum)) {
partition = 2;
} else if ("139".equals(preNum)) {
partition = 3;
}
return partition;
}
}
//FlowBean类
//实现Writable,并重写write和readFileds方法
public class FlowBean implements Writable {
//定义upFlow,downFlow,sumFlow属性
private long upFlow;
private long downFlow;
private long sumFlow;
//提供一个空参构造器
public FlowBean() {
}
//提供属性的getter,setter方法
public long getUpFlow() {
return upFlow;
}
public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
}
public long getDownFlow() {
return downFlow;
}
public void setDownFlow(long downFlow) {
this.downFlow = downFlow;
}
public long getSumFlow() {
return sumFlow;
}
public void setSumFlow(long sumFlow) {
this.sumFlow = sumFlow;
}
//重载setSumFlow方法
public void setSumFlow() {
this.sumFlow = this.upFlow + this.downFlow;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(this.upFlow);
out.writeLong(this.downFlow);
out.writeLong(this.sumFlow);
}
@Override
public void readFields(DataInput in) throws IOException {
this.upFlow = in.readLong();
this.downFlow = in.