把单词按照ASCII码奇偶分区(Partitioner)
1、分析
2、自定义分区
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class WordCountPartitioner extends Partitioner<Text, IntWritable>{
@Override
public int getPartition(Text key, IntWritable value, int numPartitions) {
// 1 获取单词key
String firWord = key.toString().substring(0, 1);
char[] charArray = firWord.toCharArray();
int result = charArray[0];
// int result = key.toString().charAt(0);
// 2 根据奇数偶数分区
if (result % 2 == 0) {
return 0;
}else {
return 1;
}
}
}
3、在驱动中配置加载分区,设置reducetask个数
job.setPartitionerClass(WordCountPartitioner.class);
job.setNumReduceTasks(2);
4、基于wordcount案例
https://www.jianshu.com/p/c8726f1ccd8f
5、Code -> GitHub