Hadoop Map Reduce Secondary Sort

How to sort the value?

Hadoop.The.Definitive.Guide.3rd.Edition show that answer:

1. Make the key a composite of the natural key and the natural value.
2. The sort comparator should order by the composite key, that is, the natural key
and natural value.
3. The partitioner and grouping comparator for the composite key should consider
only the natural key for partitioning and grouping.

A example :

 natural key : id

 natural value : name

 sort name asc of the same id

Step 1 : create the composite key class

public static class CompositeKey implements
			WritableComparable<CompositeKey> {
		public Long id;
		public String name;
		public void readFields(DataInput in) throws IOException {
			if (in.readByte() != -1) {
				this.id = in.readLong();
			}
			if (in.readByte() != -1) {
				this.name = Text.readString(in);
			}
		}
		public void write(DataOutput out) throws IOException {
			if (this.id == null) {
				out.writeByte(-1);
			} else {
				out.writeByte(0);
				out.writeLong(this.id);
			}
			if (this.name == null) {
				out.writeByte(-1);
			} else {
				out.writeByte(0);
				Text.writeString(out, this.name);
			}
		}
		public int compareTo(CompositeKey o) {
			int returnValue = -1;
			returnValue = checkNullsAndCompare(this.id, o.id);
			if (returnValue != 0) {
				return returnValue;
			}
			returnValue = checkNullsAndCompare(this.name, o.name);
			if (returnValue != 0) {
				return returnValue;
			}
			return returnValue;
		}
		private int checkNullsAndCompare(Object object1, Object object2) {
			int returnValue = 0;
			if (object1 instanceof Comparable && object2 instanceof Comparable) {
				returnValue = ((Comparable) object1).compareTo(object2);
			} else if (object1 != null && object2 != null) {
				returnValue = compareStrings(object1.toString(),
						object2.toString());
			} else if (object1 == null && object2 != null) {
				returnValue = 1;
			} else if (object1 != null && object2 == null) {
				returnValue = -1;
			} else {
				returnValue = 0;
			}
			return returnValue;
		}
		private int compareStrings(String string1, String string2) {
			return string1.compareTo(string2);
		}
		public int hashCode() {
			final int prime = 31;
			int result = 1;
			result = prime * result
					+ ((this.id == null) ? 0 : this.id.hashCode());
			result = prime * result
					+ ((this.name == null) ? 0 : this.name.hashCode());
			return result;
		}
		public boolean equals(Object obj) {
			if (this == obj)
				return true;
			if (obj == null)
				return false;
			if (getClass() != obj.getClass())
				return false;
			final CompositeKey other = (CompositeKey) obj;
			if (this.id == null) {
				if (other.id != null)
					return false;
			} else if (!this.id.equals(other.id))
				return false;
			if (this.name == null) {
				if (other.name != null)
					return false;
			} else if (!this.name.equals(other.name))
				return false;
			return true;
		}
	}

Step 2: create the map class, use the composite key as key output,text that contain name as value output

	public static class Map extends MapReduceBase implements
			Mapper<LongWritable, Text, CompositeKey, Text> {
		CompositeKey ck = new CompositeKey();
		Text v = new Text();
		public void map(LongWritable key, Text value,
				OutputCollector<CompositeKey, Text> output, Reporter reporter)
				throws IOException {
			String line = value.toString();
			StringTokenizer tokenizer = new StringTokenizer(line);
			String id = tokenizer.nextToken();
			String name = tokenizer.nextToken();
			ck.id = Long.valueOf(id);
			ck.name = name;
			v.set(name);
			output.collect(ck, v);
		}
	}

Step 3 : create the reduce class, only output key,value

	public static class Reduce extends MapReduceBase implements
			Reducer<CompositeKey, Text, LongWritable, Text> {
		public void reduce(CompositeKey key, Iterator<Text> values,
				OutputCollector<LongWritable, Text> output, Reporter reporter)
				throws IOException {
			while (values.hasNext()) {
				output.collect(new LongWritable(key.id), values.next());
			}
		}
	}


Step 4 : set the natural key partitioner,then the same id data go to the same reduce

	public static class NaturalKeyPartitioner implements
			org.apache.hadoop.mapred.Partitioner<CompositeKey, Text> {
		@Override
		public int getPartition(CompositeKey key, Text value, int num) {
			return (getHashCode(key) & Integer.MAX_VALUE) % num;
		}
		@Override
		public void configure(JobConf arg0) {
		}
		public int getHashCode(CompositeKey key) {
			return (key.id == null) ? 0 : key.id.hashCode();
		}
	}

Step 5 : create natural comparator for the value group

	public static class NaturalKeyComparator extends
			org.apache.hadoop.io.WritableComparator {
		protected NaturalKeyComparator() {
			super(CompositeKey.class, true);
		}
		public int compare(WritableComparable a, WritableComparable b) {
			CompositeKey key1 = (CompositeKey) a;
			CompositeKey key2 = (CompositeKey) b;
			return key1.id.compareTo(key2.id);
		}
	}

Step 6 : config the map reduce job

	public static void main(String[] args) throws Exception {
		JobConf conf = new JobConf(SortValue.class);
		conf.setJobName("sortvalue");
		conf.setPartitionerClass(NaturalKeyPartitioner.class);
		conf.setOutputValueGroupingComparator(NaturalKeyComparator.class);
		conf.setMapOutputKeyClass(CompositeKey.class);
		conf.setMapOutputValueClass(Text.class);
		conf.setOutputKeyClass(LongWritable.class);
		conf.setOutputValueClass(Text.class);
		conf.setMapperClass(Map.class);
		// conf.setCombinerClass(Reduce.class);
		conf.setReducerClass(Reduce.class);
		conf.setInputFormat(TextInputFormat.class);
		conf.setOutputFormat(TextOutputFormat.class);
		FileInputFormat.setInputPaths(conf, new Path(args[0]));
		FileOutputFormat.setOutputPath(conf, new Path(args[1]));
		JobClient.runJob(conf);
	}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值