首先自定义分区规则类
/**
* Partitioner
*/
public static class StepToolPartitioner extends Partitioner<Text, Text>{
private static Map<String, Integer> accountMap = new HashMap<String, Integer>();
static{
accountMap.put("LATELY", 0);
accountMap.put("TRUE", 0);
accountMap.put("FALSE", 1);
}
@Override
public int getPartition(Text key, Text value, int numPartitions) {
String keyString[] = key.toString().split("\\|");
Integer part = accountMap.get(keyString[1]);
if(part==null){
part=0;
}
return part;
}
}
在MapReduce Main方法中设置:
job.setPartitionerClass(StepToolPartitioner.class);
job.setNumReduceTasks(2);//根据上述return part有几个类型的reduce就要设置几个Task(accountMap中的value只有0或1,所以是2个)。
所需jar包:
commons-beanutils-1.8.0.jar
commons-cli-1.2.jar
commons-collections-3.2.2.jar
commons-configuration-1.6.jar
commons-httpclient-3.1.jar
commons-io-2.4.jar
commons-lang-2.6.jar
commons-logging-1.1.3.jar
ezmorph-1.0.6.jar
guava-11.0.2.jar
hadoop-auth-2.7.2.jar
hadoop-common-2.7.2.jar
hadoop-hdfs-2.7.2.jar
hadoop-mapreduce-client-core-2.7.2.jar
htrace-core-3.1.0-incubating.jar
httpclient-4.3.2.jar
httpcore-4.3.2.jar
json-lib-2.4-jdk15.jar
log4j-1.2.17.jar
ojdbc6.jar
protobuf-java-2.5.0.jar
servlet-api-2.5.jar
slf4j-api-1.7.10.jar