package org.apache.hadoop.mapred;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
public interface Partitioner<K2, V2> extends JobConfigurable {
//这里面就一个方法实现分区
int getPartition(K2 key, V2 value, int numPartitions);
}
//默认的分区是hash分区
//下面是hash分区的源码
package org.apache.hadoop.mapred.lib;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.JobConf;
//hash分区实现Partitioner接口重写里面的getPartition
public class HashPartitioner<K2, V2> implements Partitioner<K2, V2> {
public void configure(JobConf job) {}
public int getPartition(K2 key, V2 value,
int numReduceTasks) {
//key.hashCode然后就能实现相同的key进入同一个reduce,&Integer.MAX_VALUE是为了防止key.hashCode
//出现负数的情况 &表示按位与运算(相同为1不同为0)
return (key.hashCode() & Integer.MAX_VALUE) % numReduceTasks;
}
}