1、为何使用Partitioner,主要是想reduce的结果能够根据key再次分类输出到不同的文件夹中。
2、结果能够直观,同时做到对数据结果的简单的统计分析。
1、输入的数据文件内容如下(1条数据内容少,1条数据内容超长,3条数据内容正常):
kaka 1 28
hua 0 26
chao 1
tao 1 22
mao 0 29 222、目的是为了分别输出结果,正确的结果输出到一个文本,太短的数据输出到一个文本,太长的输出到一个文本,共三个文本输出。
代码
package com.partition; import java.io.IOException; import java.util.*; import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.*; import org.apache.hadoop.io.*; import org.apache.hadoop.mapred.*; import org.apache.hadoop.util.*; import com.hadoop.mapred.WordCount.Map; import com.hadoop.mapred.WordCount.Reduce; //Partitioner函数的使用 public class MyPartitioner { //Map函数 public static class MyMap extends MapReduceBase implements Mapper<LongWritable,Text,Text,Text>{ public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String [] arr_value = value.toString().split("\t"); Text word1 = new Text(); Text word2 = new Text(); if(arr_value.length > 3){ word1.set("long"); word2.set(value); }else if(arr_value.length < 3){ word1.set("short"); word2.set(value); }else { word1.set("right"); word2.set(value); } output.collect(word1, word2); } } public static class MyPartitionerPar implements Partitioner<Text, Text> { @Override public int getPartition(Text key, Text value, int numPartitions) { int result = 0; System.out.println("numPartitions--"+numPartitions); if (key.toString().equals("long")) { result = 0 % numPartitions; } else if (key.toString().equals("short")) { result = 1 % numPartitions; } else if (key.toString().equals("right")) { result = 2 % numPartitions; } return result; } @Override public void configure(JobConf arg0) { // TODO Auto-generated method stub } } public static class MyReduce extends MapReduceBase implements Reducer<Text, Text, Text, Text> { public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { output.collect(key, new Text(values.next().getBytes())); } } } public static void main(String[] args) throws Exception { JobConf conf = new JobConf(MyPartitioner.class); conf.setJobName("MyPartitioner"); conf.setNumReduceTasks(3); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setPartitionerClass(MyPartitionerPar.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(MyMap.class); conf.setReducerClass(MyReduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); } }